diff --git "a/gemma3_FFN_PF_lut6_chunk_01of01.mlmodelc/model.mil" "b/gemma3_FFN_PF_lut6_chunk_01of01.mlmodelc/model.mil" deleted file mode 100644--- "a/gemma3_FFN_PF_lut6_chunk_01of01.mlmodelc/model.mil" +++ /dev/null @@ -1,27686 +0,0 @@ -program(1.3) -[buildInfo = dict({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}})] -{ - func infer(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_global, state> model_model_kv_cache_local, tensor position_ids) { - tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(884864))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(901312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122560))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1126720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1347968))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1352128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2236928))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2253376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2474624))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2478784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2700032))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2704192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3588992))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3605440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3826688))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3830848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4052096))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4056256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4941056))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4957504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5178752))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5182912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5404160))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5408320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6293120))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6309568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6530816))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6534976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6756224))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6760384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7645184))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7661632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7882880))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7887040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8108288))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8112448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8997248))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9013696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9234944))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9239104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9460352))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9464512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10349312))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10365760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10587008))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10591168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10812416))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10816576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11701376))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11717824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11939072))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11943232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12164480))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12168640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13053440))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13069888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13291136))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13295296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13516544))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13520704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14405504))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14421952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14643200))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14647360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14868608))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14872768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15757568))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15774016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15995264))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15999424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16220672))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16224832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17109632))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17126080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17347328))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17351488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17572736))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17576896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18461696))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18478144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18699392))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18703552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18924800))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18928960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19813760))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19830208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20051456))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20055616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20276864))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20281024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21165824))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21182272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21403520))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21407680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21628928))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21633088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22517888))))[name = string("model_model_layers_16_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_16_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22534336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22755584))))[name = string("model_model_layers_16_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_16_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22759744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22980992))))[name = string("model_model_layers_16_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22985152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23869952))))[name = string("model_model_layers_17_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_17_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23886400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24107648))))[name = string("model_model_layers_17_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_17_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24111808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24333056))))[name = string("model_model_layers_17_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24337216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25222016))))[name = string("model_model_layers_18_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_18_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25238464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25459712))))[name = string("model_model_layers_18_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_18_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25463872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25685120))))[name = string("model_model_layers_18_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25689280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26574080))))[name = string("model_model_layers_19_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_19_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26590528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26811776))))[name = string("model_model_layers_19_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_19_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26815936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27037184))))[name = string("model_model_layers_19_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27041344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27926144))))[name = string("model_model_layers_20_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_20_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27942592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28163840))))[name = string("model_model_layers_20_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_20_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28168000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28389248))))[name = string("model_model_layers_20_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_21_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28393408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29278208))))[name = string("model_model_layers_21_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_21_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29294656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29515904))))[name = string("model_model_layers_21_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_21_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29520064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29741312))))[name = string("model_model_layers_21_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_22_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29745472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30630272))))[name = string("model_model_layers_22_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_22_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30646720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30867968))))[name = string("model_model_layers_22_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_22_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30872128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31093376))))[name = string("model_model_layers_22_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_23_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31097536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31982336))))[name = string("model_model_layers_23_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_23_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31998784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32220032))))[name = string("model_model_layers_23_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_23_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32224192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32445440))))[name = string("model_model_layers_23_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_24_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32449600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33334400))))[name = string("model_model_layers_24_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_24_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33350848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33572096))))[name = string("model_model_layers_24_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_24_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33576256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33797504))))[name = string("model_model_layers_24_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_25_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33801664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34686464))))[name = string("model_model_layers_25_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_25_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34702912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34924160))))[name = string("model_model_layers_25_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_25_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34928320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35149568))))[name = string("model_model_layers_25_self_attn_v_proj_weight_palettized")]; - int32 var_1660_batch_dims_0 = const()[name = string("op_1660_batch_dims_0"), val = int32(0)]; - bool var_1660_validate_indices_0 = const()[name = string("op_1660_validate_indices_0"), val = bool(false)]; - tensor var_1652_to_fp16 = const()[name = string("op_1652_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35153728)))]; - string current_pos_to_int16_dtype_0 = const()[name = string("current_pos_to_int16_dtype_0"), val = string("int16")]; - string cast_266_dtype_0 = const()[name = string("cast_266_dtype_0"), val = string("int32")]; - int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; - tensor current_pos_to_int16 = cast(dtype = current_pos_to_int16_dtype_0, x = current_pos)[name = string("cast_5")]; - tensor cast_266 = cast(dtype = cast_266_dtype_0, x = current_pos_to_int16)[name = string("cast_4")]; - tensor greater_equal_0 = greater_equal(x = cast_266, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; - int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(8192)]; - tensor add_0 = add(x = cast_266, y = slice_by_index_0)[name = string("add_0")]; - tensor select_0 = select(a = cast_266, b = add_0, cond = greater_equal_0)[name = string("select_0")]; - string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")]; - string cast_0_dtype_0 = const()[name = string("cast_0_dtype_0"), val = string("int32")]; - int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)]; - tensor select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_3")]; - tensor cast_0 = cast(dtype = cast_0_dtype_0, x = select_0_to_int16)[name = string("cast_2")]; - tensor greater_equal_0_1 = greater_equal(x = cast_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")]; - int32 slice_by_index_0_1 = const()[name = string("slice_by_index_0_1"), val = int32(8192)]; - tensor add_0_1 = add(x = cast_0, y = slice_by_index_0_1)[name = string("add_0_1")]; - tensor select_0_1 = select(a = cast_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")]; - int32 op_1660_cast_fp16_cast_uint16_cast_uint16_axis_0 = const()[name = string("op_1660_cast_fp16_cast_uint16_cast_uint16_axis_0"), val = int32(1)]; - tensor op_1660_cast_fp16_cast_uint16_cast_uint16 = gather(axis = op_1660_cast_fp16_cast_uint16_cast_uint16_axis_0, batch_dims = var_1660_batch_dims_0, indices = select_0_1, validate_indices = var_1660_validate_indices_0, x = var_1652_to_fp16)[name = string("op_1660_cast_fp16_cast_uint16_cast_uint16")]; - tensor var_1665 = const()[name = string("op_1665"), val = tensor([1, 1, 1, -1])]; - tensor sin_1_cast_fp16 = reshape(shape = var_1665, x = op_1660_cast_fp16_cast_uint16_cast_uint16)[name = string("sin_1_cast_fp16")]; - int32 var_1675_axis_0 = const()[name = string("op_1675_axis_0"), val = int32(1)]; - int32 var_1675_batch_dims_0 = const()[name = string("op_1675_batch_dims_0"), val = int32(0)]; - bool var_1675_validate_indices_0 = const()[name = string("op_1675_validate_indices_0"), val = bool(false)]; - tensor var_1667_to_fp16 = const()[name = string("op_1667_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39348096)))]; - string current_pos_to_uint16_dtype_0 = const()[name = string("current_pos_to_uint16_dtype_0"), val = string("uint16")]; - tensor current_pos_to_uint16 = cast(dtype = current_pos_to_uint16_dtype_0, x = current_pos)[name = string("cast_1")]; - tensor var_1675_cast_fp16_cast_uint16 = gather(axis = var_1675_axis_0, batch_dims = var_1675_batch_dims_0, indices = current_pos_to_uint16, validate_indices = var_1675_validate_indices_0, x = var_1667_to_fp16)[name = string("op_1675_cast_fp16_cast_uint16")]; - tensor var_1680 = const()[name = string("op_1680"), val = tensor([1, 1, 1, -1])]; - tensor cos_1_cast_fp16 = reshape(shape = var_1680, x = var_1675_cast_fp16_cast_uint16)[name = string("cos_1_cast_fp16")]; - int32 var_1701 = const()[name = string("op_1701"), val = int32(-1)]; - fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_1703_cast_fp16 = mul(x = hidden_states, y = const_0_promoted_to_fp16)[name = string("op_1703_cast_fp16")]; - bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; - tensor input_1_cast_fp16 = concat(axis = var_1701, interleave = input_1_interleave_0, values = (hidden_states, var_1703_cast_fp16))[name = string("input_1_cast_fp16")]; - tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; - fp16 var_1698_to_fp16 = const()[name = string("op_1698_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_1698_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")]; - tensor normed_3_begin_0 = const()[name = string("normed_3_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_3_end_0 = const()[name = string("normed_3_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_3_end_mask_0 = const()[name = string("normed_3_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_3_cast_fp16 = slice_by_index(begin = normed_3_begin_0, end = normed_3_end_0, end_mask = normed_3_end_mask_0, x = normed_1_cast_fp16)[name = string("normed_3_cast_fp16")]; - tensor var_1717_to_fp16 = const()[name = string("op_1717_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43542464)))]; - tensor hidden_states_3_cast_fp16 = mul(x = normed_3_cast_fp16, y = var_1717_to_fp16)[name = string("hidden_states_3_cast_fp16")]; - tensor var_1722 = const()[name = string("op_1722"), val = tensor([0, 2, 1])]; - tensor var_1725_axes_0 = const()[name = string("op_1725_axes_0"), val = tensor([2])]; - tensor var_1723_cast_fp16 = transpose(perm = var_1722, x = hidden_states_3_cast_fp16)[name = string("transpose_155")]; - tensor var_1725_cast_fp16 = expand_dims(axes = var_1725_axes_0, x = var_1723_cast_fp16)[name = string("op_1725_cast_fp16")]; - string var_1741_pad_type_0 = const()[name = string("op_1741_pad_type_0"), val = string("valid")]; - tensor var_1741_strides_0 = const()[name = string("op_1741_strides_0"), val = tensor([1, 1])]; - tensor var_1741_pad_0 = const()[name = string("op_1741_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1741_dilations_0 = const()[name = string("op_1741_dilations_0"), val = tensor([1, 1])]; - int32 var_1741_groups_0 = const()[name = string("op_1741_groups_0"), val = int32(1)]; - tensor var_1741 = conv(dilations = var_1741_dilations_0, groups = var_1741_groups_0, pad = var_1741_pad_0, pad_type = var_1741_pad_type_0, strides = var_1741_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_1725_cast_fp16)[name = string("op_1741")]; - tensor var_1746 = const()[name = string("op_1746"), val = tensor([1, 4, 1, 256])]; - tensor var_1747 = reshape(shape = var_1746, x = var_1741)[name = string("op_1747")]; - string var_1763_pad_type_0 = const()[name = string("op_1763_pad_type_0"), val = string("valid")]; - tensor var_1763_strides_0 = const()[name = string("op_1763_strides_0"), val = tensor([1, 1])]; - tensor var_1763_pad_0 = const()[name = string("op_1763_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1763_dilations_0 = const()[name = string("op_1763_dilations_0"), val = tensor([1, 1])]; - int32 var_1763_groups_0 = const()[name = string("op_1763_groups_0"), val = int32(1)]; - tensor var_1763 = conv(dilations = var_1763_dilations_0, groups = var_1763_groups_0, pad = var_1763_pad_0, pad_type = var_1763_pad_type_0, strides = var_1763_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_1725_cast_fp16)[name = string("op_1763")]; - tensor var_1768 = const()[name = string("op_1768"), val = tensor([1, 1, 1, 256])]; - tensor var_1769 = reshape(shape = var_1768, x = var_1763)[name = string("op_1769")]; - string var_1785_pad_type_0 = const()[name = string("op_1785_pad_type_0"), val = string("valid")]; - tensor var_1785_strides_0 = const()[name = string("op_1785_strides_0"), val = tensor([1, 1])]; - tensor var_1785_pad_0 = const()[name = string("op_1785_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1785_dilations_0 = const()[name = string("op_1785_dilations_0"), val = tensor([1, 1])]; - int32 var_1785_groups_0 = const()[name = string("op_1785_groups_0"), val = int32(1)]; - tensor var_1785 = conv(dilations = var_1785_dilations_0, groups = var_1785_groups_0, pad = var_1785_pad_0, pad_type = var_1785_pad_type_0, strides = var_1785_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_1725_cast_fp16)[name = string("op_1785")]; - tensor var_1790 = const()[name = string("op_1790"), val = tensor([1, 1, 1, 256])]; - tensor var_1791 = reshape(shape = var_1790, x = var_1785)[name = string("op_1791")]; - int32 var_1806 = const()[name = string("op_1806"), val = int32(-1)]; - fp16 const_4_promoted = const()[name = string("const_4_promoted"), val = fp16(-0x1p+0)]; - tensor var_1808 = mul(x = var_1747, y = const_4_promoted)[name = string("op_1808")]; - bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; - tensor input_5 = concat(axis = var_1806, interleave = input_5_interleave_0, values = (var_1747, var_1808))[name = string("input_5")]; - tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; - fp16 var_1803_to_fp16 = const()[name = string("op_1803_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_1803_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; - tensor normed_7_begin_0 = const()[name = string("normed_7_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_7_end_0 = const()[name = string("normed_7_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_7_end_mask_0 = const()[name = string("normed_7_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_7 = slice_by_index(begin = normed_7_begin_0, end = normed_7_end_0, end_mask = normed_7_end_mask_0, x = normed_5_cast_fp16)[name = string("normed_7")]; - tensor var_1822_to_fp16 = const()[name = string("op_1822_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43544832)))]; - tensor q_1_cast_fp16 = mul(x = normed_7, y = var_1822_to_fp16)[name = string("q_1_cast_fp16")]; - int32 var_1833 = const()[name = string("op_1833"), val = int32(-1)]; - fp16 const_8_promoted = const()[name = string("const_8_promoted"), val = fp16(-0x1p+0)]; - tensor var_1835 = mul(x = var_1769, y = const_8_promoted)[name = string("op_1835")]; - bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)]; - tensor input_7 = concat(axis = var_1833, interleave = input_7_interleave_0, values = (var_1769, var_1835))[name = string("input_7")]; - tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; - fp16 var_1830_to_fp16 = const()[name = string("op_1830_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_1830_to_fp16, x = input_7)[name = string("normed_9_cast_fp16")]; - tensor normed_11_begin_0 = const()[name = string("normed_11_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_11_end_0 = const()[name = string("normed_11_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_11_end_mask_0 = const()[name = string("normed_11_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_11 = slice_by_index(begin = normed_11_begin_0, end = normed_11_end_0, end_mask = normed_11_end_mask_0, x = normed_9_cast_fp16)[name = string("normed_11")]; - tensor var_1849_to_fp16 = const()[name = string("op_1849_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43545408)))]; - tensor k_1_cast_fp16 = mul(x = normed_11, y = var_1849_to_fp16)[name = string("k_1_cast_fp16")]; - tensor var_1851_cast_fp16 = mul(x = q_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1851_cast_fp16")]; - tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1_cast_fp16)[name = string("x1_1_cast_fp16")]; - tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1_cast_fp16)[name = string("x2_1_cast_fp16")]; - fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_1872_cast_fp16 = mul(x = x2_1_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1872_cast_fp16")]; - int32 var_1874 = const()[name = string("op_1874"), val = int32(-1)]; - bool var_1875_interleave_0 = const()[name = string("op_1875_interleave_0"), val = bool(false)]; - tensor var_1875_cast_fp16 = concat(axis = var_1874, interleave = var_1875_interleave_0, values = (var_1872_cast_fp16, x1_1_cast_fp16))[name = string("op_1875_cast_fp16")]; - tensor var_1876_cast_fp16 = mul(x = var_1875_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1876_cast_fp16")]; - tensor query_states_1_cast_fp16 = add(x = var_1851_cast_fp16, y = var_1876_cast_fp16)[name = string("query_states_1_cast_fp16")]; - tensor var_1879_cast_fp16 = mul(x = k_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1879_cast_fp16")]; - tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1_cast_fp16)[name = string("x1_3_cast_fp16")]; - tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1_cast_fp16)[name = string("x2_3_cast_fp16")]; - fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_1900_cast_fp16 = mul(x = x2_3_cast_fp16, y = const_17_promoted_to_fp16)[name = string("op_1900_cast_fp16")]; - int32 var_1902 = const()[name = string("op_1902"), val = int32(-1)]; - bool var_1903_interleave_0 = const()[name = string("op_1903_interleave_0"), val = bool(false)]; - tensor var_1903_cast_fp16 = concat(axis = var_1902, interleave = var_1903_interleave_0, values = (var_1900_cast_fp16, x1_3_cast_fp16))[name = string("op_1903_cast_fp16")]; - tensor var_1904_cast_fp16 = mul(x = var_1903_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1904_cast_fp16")]; - tensor key_states_1_cast_fp16 = add(x = var_1879_cast_fp16, y = var_1904_cast_fp16)[name = string("key_states_1_cast_fp16")]; - int32 var_1908 = const()[name = string("op_1908"), val = int32(1)]; - tensor var_1909 = add(x = current_pos, y = var_1908)[name = string("op_1909")]; - tensor read_state_0 = read_state(input = model_model_kv_cache_local)[name = string("read_state_0")]; - tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; - tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; - tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; - tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; - int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; - bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; - tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; - tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; - tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; - int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; - bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; - tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_1909, concat_3_values3_0))[name = string("concat_3")]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_1_stride_0, update = key_states_1_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_0_write_state")]; - tensor coreml_update_state_52 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_0")]; - tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([22])]; - tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; - tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; - tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([23])]; - int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; - bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; - tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; - tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; - tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; - int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; - bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; - tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_1909, concat_7_values3_0))[name = string("concat_7")]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_2_stride_0, update = var_1791, x = coreml_update_state_52)[name = string("model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_1_write_state")]; - tensor coreml_update_state_53 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_1")]; - tensor var_1959_begin_0 = const()[name = string("op_1959_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1959_end_0 = const()[name = string("op_1959_end_0"), val = tensor([1, 1, 512, 256])]; - tensor var_1959_end_mask_0 = const()[name = string("op_1959_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_1959_cast_fp16 = slice_by_index(begin = var_1959_begin_0, end = var_1959_end_0, end_mask = var_1959_end_mask_0, x = coreml_update_state_53)[name = string("op_1959_cast_fp16")]; - tensor var_1966_begin_0 = const()[name = string("op_1966_begin_0"), val = tensor([22, 0, 0, 0])]; - tensor var_1966_end_0 = const()[name = string("op_1966_end_0"), val = tensor([23, 1, 512, 256])]; - tensor var_1966_end_mask_0 = const()[name = string("op_1966_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_1966_cast_fp16 = slice_by_index(begin = var_1966_begin_0, end = var_1966_end_0, end_mask = var_1966_end_mask_0, x = coreml_update_state_53)[name = string("op_1966_cast_fp16")]; - tensor var_2003 = const()[name = string("op_2003"), val = tensor([1, 4, 1, 1])]; - tensor x_5_cast_fp16 = tile(reps = var_2003, x = var_1959_cast_fp16)[name = string("x_5_cast_fp16")]; - tensor var_2023 = const()[name = string("op_2023"), val = tensor([1, 4, 1, 1])]; - tensor x_11_cast_fp16 = tile(reps = var_2023, x = var_1966_cast_fp16)[name = string("x_11_cast_fp16")]; - bool var_2050_transpose_x_1 = const()[name = string("op_2050_transpose_x_1"), val = bool(false)]; - bool var_2050_transpose_y_1 = const()[name = string("op_2050_transpose_y_1"), val = bool(true)]; - tensor var_2050 = matmul(transpose_x = var_2050_transpose_x_1, transpose_y = var_2050_transpose_y_1, x = query_states_1_cast_fp16, y = x_5_cast_fp16)[name = string("op_2050")]; - fp16 var_2051_to_fp16 = const()[name = string("op_2051_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_1_cast_fp16 = mul(x = var_2050, y = var_2051_to_fp16)[name = string("attn_weights_1_cast_fp16")]; - tensor var_2083_begin_0 = const()[name = string("op_2083_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2083_end_0 = const()[name = string("op_2083_end_0"), val = tensor([1, 1, 1, 512])]; - tensor var_2083_end_mask_0 = const()[name = string("op_2083_end_mask_0"), val = tensor([true, true, true, false])]; - tensor var_2083 = slice_by_index(begin = var_2083_begin_0, end = var_2083_end_0, end_mask = var_2083_end_mask_0, x = causal_mask)[name = string("op_2083")]; - tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = var_2083)[name = string("attn_weights_3_cast_fp16")]; - int32 var_2086 = const()[name = string("op_2086"), val = int32(-1)]; - tensor attn_weights_5_cast_fp16 = softmax(axis = var_2086, x = attn_weights_3_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; - bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; - bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; - tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_5_cast_fp16, y = x_11_cast_fp16)[name = string("attn_output_1_cast_fp16")]; - tensor var_2097_perm_0 = const()[name = string("op_2097_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_2101 = const()[name = string("op_2101"), val = tensor([1, 1, 1024])]; - tensor var_2097_cast_fp16 = transpose(perm = var_2097_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_154")]; - tensor attn_output_5_cast_fp16 = reshape(shape = var_2101, x = var_2097_cast_fp16)[name = string("attn_output_5_cast_fp16")]; - tensor var_2106 = const()[name = string("op_2106"), val = tensor([0, 2, 1])]; - string var_2122_pad_type_0 = const()[name = string("op_2122_pad_type_0"), val = string("valid")]; - int32 var_2122_groups_0 = const()[name = string("op_2122_groups_0"), val = int32(1)]; - tensor var_2122_strides_0 = const()[name = string("op_2122_strides_0"), val = tensor([1])]; - tensor var_2122_pad_0 = const()[name = string("op_2122_pad_0"), val = tensor([0, 0])]; - tensor var_2122_dilations_0 = const()[name = string("op_2122_dilations_0"), val = tensor([1])]; - tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43545984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44430784))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_2107_cast_fp16 = transpose(perm = var_2106, x = attn_output_5_cast_fp16)[name = string("transpose_153")]; - tensor var_2122_cast_fp16 = conv(dilations = var_2122_dilations_0, groups = var_2122_groups_0, pad = var_2122_pad_0, pad_type = var_2122_pad_type_0, strides = var_2122_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_2107_cast_fp16)[name = string("op_2122_cast_fp16")]; - tensor var_2126 = const()[name = string("op_2126"), val = tensor([0, 2, 1])]; - int32 var_2137 = const()[name = string("op_2137"), val = int32(-1)]; - fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_9_cast_fp16 = transpose(perm = var_2126, x = var_2122_cast_fp16)[name = string("transpose_152")]; - tensor var_2139_cast_fp16 = mul(x = hidden_states_9_cast_fp16, y = const_26_promoted_to_fp16)[name = string("op_2139_cast_fp16")]; - bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; - tensor input_11_cast_fp16 = concat(axis = var_2137, interleave = input_11_interleave_0, values = (hidden_states_9_cast_fp16, var_2139_cast_fp16))[name = string("input_11_cast_fp16")]; - tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; - fp16 var_2134_to_fp16 = const()[name = string("op_2134_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_2134_to_fp16, x = input_11_cast_fp16)[name = string("normed_13_cast_fp16")]; - tensor normed_15_begin_0 = const()[name = string("normed_15_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_15_end_0 = const()[name = string("normed_15_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_15_end_mask_0 = const()[name = string("normed_15_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_15_cast_fp16 = slice_by_index(begin = normed_15_begin_0, end = normed_15_end_0, end_mask = normed_15_end_mask_0, x = normed_13_cast_fp16)[name = string("normed_15_cast_fp16")]; - tensor var_2153_to_fp16 = const()[name = string("op_2153_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44449280)))]; - tensor attn_output_9_cast_fp16 = mul(x = normed_15_cast_fp16, y = var_2153_to_fp16)[name = string("attn_output_9_cast_fp16")]; - tensor hidden_states_11_cast_fp16 = add(x = hidden_states, y = attn_output_9_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; - int32 var_2166 = const()[name = string("op_2166"), val = int32(-1)]; - fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2168_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_2168_cast_fp16")]; - bool input_13_interleave_0 = const()[name = string("input_13_interleave_0"), val = bool(false)]; - tensor input_13_cast_fp16 = concat(axis = var_2166, interleave = input_13_interleave_0, values = (hidden_states_11_cast_fp16, var_2168_cast_fp16))[name = string("input_13_cast_fp16")]; - tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; - fp16 var_2163_to_fp16 = const()[name = string("op_2163_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_2163_to_fp16, x = input_13_cast_fp16)[name = string("normed_17_cast_fp16")]; - tensor normed_19_begin_0 = const()[name = string("normed_19_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_19_end_0 = const()[name = string("normed_19_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_19_end_mask_0 = const()[name = string("normed_19_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_19_cast_fp16 = slice_by_index(begin = normed_19_begin_0, end = normed_19_end_0, end_mask = normed_19_end_mask_0, x = normed_17_cast_fp16)[name = string("normed_19_cast_fp16")]; - tensor var_2182_to_fp16 = const()[name = string("op_2182_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44451648)))]; - tensor x_13_cast_fp16 = mul(x = normed_19_cast_fp16, y = var_2182_to_fp16)[name = string("x_13_cast_fp16")]; - tensor var_2194 = const()[name = string("op_2194"), val = tensor([0, 2, 1])]; - tensor input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor([2])]; - tensor var_2195_cast_fp16 = transpose(perm = var_2194, x = x_13_cast_fp16)[name = string("transpose_151")]; - tensor input_15_cast_fp16 = expand_dims(axes = input_15_axes_0, x = var_2195_cast_fp16)[name = string("input_15_cast_fp16")]; - string x_15_pad_type_0 = const()[name = string("x_15_pad_type_0"), val = string("valid")]; - tensor x_15_strides_0 = const()[name = string("x_15_strides_0"), val = tensor([1, 1])]; - tensor x_15_pad_0 = const()[name = string("x_15_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_15_dilations_0 = const()[name = string("x_15_dilations_0"), val = tensor([1, 1])]; - int32 x_15_groups_0 = const()[name = string("x_15_groups_0"), val = int32(1)]; - tensor model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44454016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50426048))))[name = string("model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_15_cast_fp16 = conv(dilations = x_15_dilations_0, groups = x_15_groups_0, pad = x_15_pad_0, pad_type = x_15_pad_type_0, strides = x_15_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_15_cast_fp16)[name = string("x_15_cast_fp16")]; - string b_1_pad_type_0 = const()[name = string("b_1_pad_type_0"), val = string("valid")]; - tensor b_1_strides_0 = const()[name = string("b_1_strides_0"), val = tensor([1, 1])]; - tensor b_1_pad_0 = const()[name = string("b_1_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_1_dilations_0 = const()[name = string("b_1_dilations_0"), val = tensor([1, 1])]; - int32 b_1_groups_0 = const()[name = string("b_1_groups_0"), val = int32(1)]; - tensor model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50536704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56508736))))[name = string("model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_1_cast_fp16 = conv(dilations = b_1_dilations_0, groups = b_1_groups_0, pad = b_1_pad_0, pad_type = b_1_pad_type_0, strides = b_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_15_cast_fp16)[name = string("b_1_cast_fp16")]; - string var_2220_mode_0 = const()[name = string("op_2220_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_2220_cast_fp16 = gelu(mode = var_2220_mode_0, x = x_15_cast_fp16)[name = string("op_2220_cast_fp16")]; - tensor input_17_cast_fp16 = mul(x = var_2220_cast_fp16, y = b_1_cast_fp16)[name = string("input_17_cast_fp16")]; - string e_1_pad_type_0 = const()[name = string("e_1_pad_type_0"), val = string("valid")]; - tensor e_1_strides_0 = const()[name = string("e_1_strides_0"), val = tensor([1, 1])]; - tensor e_1_pad_0 = const()[name = string("e_1_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_1_dilations_0 = const()[name = string("e_1_dilations_0"), val = tensor([1, 1])]; - int32 e_1_groups_0 = const()[name = string("e_1_groups_0"), val = int32(1)]; - tensor model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56619392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62591424))))[name = string("model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_1_cast_fp16 = conv(dilations = e_1_dilations_0, groups = e_1_groups_0, pad = e_1_pad_0, pad_type = e_1_pad_type_0, strides = e_1_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_17_cast_fp16)[name = string("e_1_cast_fp16")]; - tensor var_2228_axes_0 = const()[name = string("op_2228_axes_0"), val = tensor([2])]; - tensor var_2228_cast_fp16 = squeeze(axes = var_2228_axes_0, x = e_1_cast_fp16)[name = string("op_2228_cast_fp16")]; - tensor var_2229 = const()[name = string("op_2229"), val = tensor([0, 2, 1])]; - int32 var_2240 = const()[name = string("op_2240"), val = int32(-1)]; - fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_13_cast_fp16 = transpose(perm = var_2229, x = var_2228_cast_fp16)[name = string("transpose_150")]; - tensor var_2242_cast_fp16 = mul(x = hidden_states_13_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_2242_cast_fp16")]; - bool input_19_interleave_0 = const()[name = string("input_19_interleave_0"), val = bool(false)]; - tensor input_19_cast_fp16 = concat(axis = var_2240, interleave = input_19_interleave_0, values = (hidden_states_13_cast_fp16, var_2242_cast_fp16))[name = string("input_19_cast_fp16")]; - tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; - fp16 var_2237_to_fp16 = const()[name = string("op_2237_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_2237_to_fp16, x = input_19_cast_fp16)[name = string("normed_21_cast_fp16")]; - tensor normed_23_begin_0 = const()[name = string("normed_23_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_23_end_0 = const()[name = string("normed_23_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_23_end_mask_0 = const()[name = string("normed_23_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_23_cast_fp16 = slice_by_index(begin = normed_23_begin_0, end = normed_23_end_0, end_mask = normed_23_end_mask_0, x = normed_21_cast_fp16)[name = string("normed_23_cast_fp16")]; - tensor var_2256_to_fp16 = const()[name = string("op_2256_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62609920)))]; - tensor hidden_states_15_cast_fp16 = mul(x = normed_23_cast_fp16, y = var_2256_to_fp16)[name = string("hidden_states_15_cast_fp16")]; - tensor hidden_states_17_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; - int32 var_2307 = const()[name = string("op_2307"), val = int32(-1)]; - fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2309_cast_fp16 = mul(x = hidden_states_17_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_2309_cast_fp16")]; - bool input_21_interleave_0 = const()[name = string("input_21_interleave_0"), val = bool(false)]; - tensor input_21_cast_fp16 = concat(axis = var_2307, interleave = input_21_interleave_0, values = (hidden_states_17_cast_fp16, var_2309_cast_fp16))[name = string("input_21_cast_fp16")]; - tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; - fp16 var_2304_to_fp16 = const()[name = string("op_2304_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_2304_to_fp16, x = input_21_cast_fp16)[name = string("normed_25_cast_fp16")]; - tensor normed_27_begin_0 = const()[name = string("normed_27_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_27_end_0 = const()[name = string("normed_27_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_27_end_mask_0 = const()[name = string("normed_27_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_27_cast_fp16 = slice_by_index(begin = normed_27_begin_0, end = normed_27_end_0, end_mask = normed_27_end_mask_0, x = normed_25_cast_fp16)[name = string("normed_27_cast_fp16")]; - tensor var_2323_to_fp16 = const()[name = string("op_2323_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62612288)))]; - tensor hidden_states_19_cast_fp16 = mul(x = normed_27_cast_fp16, y = var_2323_to_fp16)[name = string("hidden_states_19_cast_fp16")]; - tensor var_2328 = const()[name = string("op_2328"), val = tensor([0, 2, 1])]; - tensor var_2331_axes_0 = const()[name = string("op_2331_axes_0"), val = tensor([2])]; - tensor var_2329_cast_fp16 = transpose(perm = var_2328, x = hidden_states_19_cast_fp16)[name = string("transpose_149")]; - tensor var_2331_cast_fp16 = expand_dims(axes = var_2331_axes_0, x = var_2329_cast_fp16)[name = string("op_2331_cast_fp16")]; - string var_2347_pad_type_0 = const()[name = string("op_2347_pad_type_0"), val = string("valid")]; - tensor var_2347_strides_0 = const()[name = string("op_2347_strides_0"), val = tensor([1, 1])]; - tensor var_2347_pad_0 = const()[name = string("op_2347_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2347_dilations_0 = const()[name = string("op_2347_dilations_0"), val = tensor([1, 1])]; - int32 var_2347_groups_0 = const()[name = string("op_2347_groups_0"), val = int32(1)]; - tensor var_2347 = conv(dilations = var_2347_dilations_0, groups = var_2347_groups_0, pad = var_2347_pad_0, pad_type = var_2347_pad_type_0, strides = var_2347_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_2331_cast_fp16)[name = string("op_2347")]; - tensor var_2352 = const()[name = string("op_2352"), val = tensor([1, 4, 1, 256])]; - tensor var_2353 = reshape(shape = var_2352, x = var_2347)[name = string("op_2353")]; - string var_2369_pad_type_0 = const()[name = string("op_2369_pad_type_0"), val = string("valid")]; - tensor var_2369_strides_0 = const()[name = string("op_2369_strides_0"), val = tensor([1, 1])]; - tensor var_2369_pad_0 = const()[name = string("op_2369_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2369_dilations_0 = const()[name = string("op_2369_dilations_0"), val = tensor([1, 1])]; - int32 var_2369_groups_0 = const()[name = string("op_2369_groups_0"), val = int32(1)]; - tensor var_2369 = conv(dilations = var_2369_dilations_0, groups = var_2369_groups_0, pad = var_2369_pad_0, pad_type = var_2369_pad_type_0, strides = var_2369_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_2331_cast_fp16)[name = string("op_2369")]; - tensor var_2374 = const()[name = string("op_2374"), val = tensor([1, 1, 1, 256])]; - tensor var_2375 = reshape(shape = var_2374, x = var_2369)[name = string("op_2375")]; - string var_2391_pad_type_0 = const()[name = string("op_2391_pad_type_0"), val = string("valid")]; - tensor var_2391_strides_0 = const()[name = string("op_2391_strides_0"), val = tensor([1, 1])]; - tensor var_2391_pad_0 = const()[name = string("op_2391_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2391_dilations_0 = const()[name = string("op_2391_dilations_0"), val = tensor([1, 1])]; - int32 var_2391_groups_0 = const()[name = string("op_2391_groups_0"), val = int32(1)]; - tensor var_2391 = conv(dilations = var_2391_dilations_0, groups = var_2391_groups_0, pad = var_2391_pad_0, pad_type = var_2391_pad_type_0, strides = var_2391_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_2331_cast_fp16)[name = string("op_2391")]; - tensor var_2396 = const()[name = string("op_2396"), val = tensor([1, 1, 1, 256])]; - tensor var_2397 = reshape(shape = var_2396, x = var_2391)[name = string("op_2397")]; - int32 var_2412 = const()[name = string("op_2412"), val = int32(-1)]; - fp16 const_42_promoted = const()[name = string("const_42_promoted"), val = fp16(-0x1p+0)]; - tensor var_2414 = mul(x = var_2353, y = const_42_promoted)[name = string("op_2414")]; - bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; - tensor input_25 = concat(axis = var_2412, interleave = input_25_interleave_0, values = (var_2353, var_2414))[name = string("input_25")]; - tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; - fp16 var_2409_to_fp16 = const()[name = string("op_2409_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_2409_to_fp16, x = input_25)[name = string("normed_29_cast_fp16")]; - tensor normed_31_begin_0 = const()[name = string("normed_31_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_31_end_0 = const()[name = string("normed_31_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_31_end_mask_0 = const()[name = string("normed_31_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_31 = slice_by_index(begin = normed_31_begin_0, end = normed_31_end_0, end_mask = normed_31_end_mask_0, x = normed_29_cast_fp16)[name = string("normed_31")]; - tensor var_2428_to_fp16 = const()[name = string("op_2428_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62614656)))]; - tensor q_3_cast_fp16 = mul(x = normed_31, y = var_2428_to_fp16)[name = string("q_3_cast_fp16")]; - int32 var_2439 = const()[name = string("op_2439"), val = int32(-1)]; - fp16 const_46_promoted = const()[name = string("const_46_promoted"), val = fp16(-0x1p+0)]; - tensor var_2441 = mul(x = var_2375, y = const_46_promoted)[name = string("op_2441")]; - bool input_27_interleave_0 = const()[name = string("input_27_interleave_0"), val = bool(false)]; - tensor input_27 = concat(axis = var_2439, interleave = input_27_interleave_0, values = (var_2375, var_2441))[name = string("input_27")]; - tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; - fp16 var_2436_to_fp16 = const()[name = string("op_2436_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_2436_to_fp16, x = input_27)[name = string("normed_33_cast_fp16")]; - tensor normed_35_begin_0 = const()[name = string("normed_35_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_35_end_0 = const()[name = string("normed_35_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_35_end_mask_0 = const()[name = string("normed_35_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_35 = slice_by_index(begin = normed_35_begin_0, end = normed_35_end_0, end_mask = normed_35_end_mask_0, x = normed_33_cast_fp16)[name = string("normed_35")]; - tensor var_2455_to_fp16 = const()[name = string("op_2455_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62615232)))]; - tensor k_3_cast_fp16 = mul(x = normed_35, y = var_2455_to_fp16)[name = string("k_3_cast_fp16")]; - tensor var_2457_cast_fp16 = mul(x = q_3_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2457_cast_fp16")]; - tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3_cast_fp16)[name = string("x1_5_cast_fp16")]; - tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3_cast_fp16)[name = string("x2_5_cast_fp16")]; - fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2478_cast_fp16 = mul(x = x2_5_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_2478_cast_fp16")]; - int32 var_2480 = const()[name = string("op_2480"), val = int32(-1)]; - bool var_2481_interleave_0 = const()[name = string("op_2481_interleave_0"), val = bool(false)]; - tensor var_2481_cast_fp16 = concat(axis = var_2480, interleave = var_2481_interleave_0, values = (var_2478_cast_fp16, x1_5_cast_fp16))[name = string("op_2481_cast_fp16")]; - tensor var_2482_cast_fp16 = mul(x = var_2481_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2482_cast_fp16")]; - tensor query_states_5_cast_fp16 = add(x = var_2457_cast_fp16, y = var_2482_cast_fp16)[name = string("query_states_5_cast_fp16")]; - tensor var_2485_cast_fp16 = mul(x = k_3_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2485_cast_fp16")]; - tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3_cast_fp16)[name = string("x1_7_cast_fp16")]; - tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3_cast_fp16)[name = string("x2_7_cast_fp16")]; - fp16 const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2506_cast_fp16 = mul(x = x2_7_cast_fp16, y = const_55_promoted_to_fp16)[name = string("op_2506_cast_fp16")]; - int32 var_2508 = const()[name = string("op_2508"), val = int32(-1)]; - bool var_2509_interleave_0 = const()[name = string("op_2509_interleave_0"), val = bool(false)]; - tensor var_2509_cast_fp16 = concat(axis = var_2508, interleave = var_2509_interleave_0, values = (var_2506_cast_fp16, x1_7_cast_fp16))[name = string("op_2509_cast_fp16")]; - tensor var_2510_cast_fp16 = mul(x = var_2509_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2510_cast_fp16")]; - tensor key_states_5_cast_fp16 = add(x = var_2485_cast_fp16, y = var_2510_cast_fp16)[name = string("key_states_5_cast_fp16")]; - tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; - tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; - tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; - tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; - int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; - bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; - tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")]; - tensor concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor([0])]; - tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; - int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; - bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; - tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_1909, concat_11_values3_0))[name = string("concat_11")]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_3_stride_0, update = key_states_5_cast_fp16, x = coreml_update_state_53)[name = string("model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_2_write_state")]; - tensor coreml_update_state_54 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_2")]; - tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([23])]; - tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; - tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; - tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([24])]; - int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)]; - bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)]; - tensor concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")]; - tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; - tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; - int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; - bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; - tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_1909, concat_15_values3_0))[name = string("concat_15")]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_4_stride_0, update = var_2397, x = coreml_update_state_54)[name = string("model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_3_write_state")]; - tensor coreml_update_state_55 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_3")]; - tensor var_2565_begin_0 = const()[name = string("op_2565_begin_0"), val = tensor([1, 0, 0, 0])]; - tensor var_2565_end_0 = const()[name = string("op_2565_end_0"), val = tensor([2, 1, 512, 256])]; - tensor var_2565_end_mask_0 = const()[name = string("op_2565_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_2565_cast_fp16 = slice_by_index(begin = var_2565_begin_0, end = var_2565_end_0, end_mask = var_2565_end_mask_0, x = coreml_update_state_55)[name = string("op_2565_cast_fp16")]; - tensor var_2572_begin_0 = const()[name = string("op_2572_begin_0"), val = tensor([23, 0, 0, 0])]; - tensor var_2572_end_0 = const()[name = string("op_2572_end_0"), val = tensor([24, 1, 512, 256])]; - tensor var_2572_end_mask_0 = const()[name = string("op_2572_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_2572_cast_fp16 = slice_by_index(begin = var_2572_begin_0, end = var_2572_end_0, end_mask = var_2572_end_mask_0, x = coreml_update_state_55)[name = string("op_2572_cast_fp16")]; - tensor var_2609 = const()[name = string("op_2609"), val = tensor([1, 4, 1, 1])]; - tensor x_21_cast_fp16 = tile(reps = var_2609, x = var_2565_cast_fp16)[name = string("x_21_cast_fp16")]; - tensor var_2629 = const()[name = string("op_2629"), val = tensor([1, 4, 1, 1])]; - tensor x_27_cast_fp16 = tile(reps = var_2629, x = var_2572_cast_fp16)[name = string("x_27_cast_fp16")]; - bool var_2656_transpose_x_1 = const()[name = string("op_2656_transpose_x_1"), val = bool(false)]; - bool var_2656_transpose_y_1 = const()[name = string("op_2656_transpose_y_1"), val = bool(true)]; - tensor var_2656 = matmul(transpose_x = var_2656_transpose_x_1, transpose_y = var_2656_transpose_y_1, x = query_states_5_cast_fp16, y = x_21_cast_fp16)[name = string("op_2656")]; - fp16 var_2657_to_fp16 = const()[name = string("op_2657_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_7_cast_fp16 = mul(x = var_2656, y = var_2657_to_fp16)[name = string("attn_weights_7_cast_fp16")]; - tensor attn_weights_9_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = var_2083)[name = string("attn_weights_9_cast_fp16")]; - int32 var_2692 = const()[name = string("op_2692"), val = int32(-1)]; - tensor attn_weights_11_cast_fp16 = softmax(axis = var_2692, x = attn_weights_9_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; - bool attn_output_11_transpose_x_0 = const()[name = string("attn_output_11_transpose_x_0"), val = bool(false)]; - bool attn_output_11_transpose_y_0 = const()[name = string("attn_output_11_transpose_y_0"), val = bool(false)]; - tensor attn_output_11_cast_fp16 = matmul(transpose_x = attn_output_11_transpose_x_0, transpose_y = attn_output_11_transpose_y_0, x = attn_weights_11_cast_fp16, y = x_27_cast_fp16)[name = string("attn_output_11_cast_fp16")]; - tensor var_2703_perm_0 = const()[name = string("op_2703_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_2707 = const()[name = string("op_2707"), val = tensor([1, 1, 1024])]; - tensor var_2703_cast_fp16 = transpose(perm = var_2703_perm_0, x = attn_output_11_cast_fp16)[name = string("transpose_148")]; - tensor attn_output_15_cast_fp16 = reshape(shape = var_2707, x = var_2703_cast_fp16)[name = string("attn_output_15_cast_fp16")]; - tensor var_2712 = const()[name = string("op_2712"), val = tensor([0, 2, 1])]; - string var_2728_pad_type_0 = const()[name = string("op_2728_pad_type_0"), val = string("valid")]; - int32 var_2728_groups_0 = const()[name = string("op_2728_groups_0"), val = int32(1)]; - tensor var_2728_strides_0 = const()[name = string("op_2728_strides_0"), val = tensor([1])]; - tensor var_2728_pad_0 = const()[name = string("op_2728_pad_0"), val = tensor([0, 0])]; - tensor var_2728_dilations_0 = const()[name = string("op_2728_dilations_0"), val = tensor([1])]; - tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62615808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63500608))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_2713_cast_fp16 = transpose(perm = var_2712, x = attn_output_15_cast_fp16)[name = string("transpose_147")]; - tensor var_2728_cast_fp16 = conv(dilations = var_2728_dilations_0, groups = var_2728_groups_0, pad = var_2728_pad_0, pad_type = var_2728_pad_type_0, strides = var_2728_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_2713_cast_fp16)[name = string("op_2728_cast_fp16")]; - tensor var_2732 = const()[name = string("op_2732"), val = tensor([0, 2, 1])]; - int32 var_2743 = const()[name = string("op_2743"), val = int32(-1)]; - fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_25_cast_fp16 = transpose(perm = var_2732, x = var_2728_cast_fp16)[name = string("transpose_146")]; - tensor var_2745_cast_fp16 = mul(x = hidden_states_25_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_2745_cast_fp16")]; - bool input_31_interleave_0 = const()[name = string("input_31_interleave_0"), val = bool(false)]; - tensor input_31_cast_fp16 = concat(axis = var_2743, interleave = input_31_interleave_0, values = (hidden_states_25_cast_fp16, var_2745_cast_fp16))[name = string("input_31_cast_fp16")]; - tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; - fp16 var_2740_to_fp16 = const()[name = string("op_2740_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_2740_to_fp16, x = input_31_cast_fp16)[name = string("normed_37_cast_fp16")]; - tensor normed_39_begin_0 = const()[name = string("normed_39_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_39_end_0 = const()[name = string("normed_39_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_39_end_mask_0 = const()[name = string("normed_39_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_39_cast_fp16 = slice_by_index(begin = normed_39_begin_0, end = normed_39_end_0, end_mask = normed_39_end_mask_0, x = normed_37_cast_fp16)[name = string("normed_39_cast_fp16")]; - tensor var_2759_to_fp16 = const()[name = string("op_2759_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63519104)))]; - tensor attn_output_19_cast_fp16 = mul(x = normed_39_cast_fp16, y = var_2759_to_fp16)[name = string("attn_output_19_cast_fp16")]; - tensor hidden_states_27_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = attn_output_19_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; - int32 var_2772 = const()[name = string("op_2772"), val = int32(-1)]; - fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2774_cast_fp16 = mul(x = hidden_states_27_cast_fp16, y = const_68_promoted_to_fp16)[name = string("op_2774_cast_fp16")]; - bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; - tensor input_33_cast_fp16 = concat(axis = var_2772, interleave = input_33_interleave_0, values = (hidden_states_27_cast_fp16, var_2774_cast_fp16))[name = string("input_33_cast_fp16")]; - tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; - fp16 var_2769_to_fp16 = const()[name = string("op_2769_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_2769_to_fp16, x = input_33_cast_fp16)[name = string("normed_41_cast_fp16")]; - tensor normed_43_begin_0 = const()[name = string("normed_43_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_43_end_0 = const()[name = string("normed_43_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_43_end_mask_0 = const()[name = string("normed_43_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_43_cast_fp16 = slice_by_index(begin = normed_43_begin_0, end = normed_43_end_0, end_mask = normed_43_end_mask_0, x = normed_41_cast_fp16)[name = string("normed_43_cast_fp16")]; - tensor var_2788_to_fp16 = const()[name = string("op_2788_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63521472)))]; - tensor x_29_cast_fp16 = mul(x = normed_43_cast_fp16, y = var_2788_to_fp16)[name = string("x_29_cast_fp16")]; - tensor var_2800 = const()[name = string("op_2800"), val = tensor([0, 2, 1])]; - tensor input_35_axes_0 = const()[name = string("input_35_axes_0"), val = tensor([2])]; - tensor var_2801_cast_fp16 = transpose(perm = var_2800, x = x_29_cast_fp16)[name = string("transpose_145")]; - tensor input_35_cast_fp16 = expand_dims(axes = input_35_axes_0, x = var_2801_cast_fp16)[name = string("input_35_cast_fp16")]; - string x_31_pad_type_0 = const()[name = string("x_31_pad_type_0"), val = string("valid")]; - tensor x_31_strides_0 = const()[name = string("x_31_strides_0"), val = tensor([1, 1])]; - tensor x_31_pad_0 = const()[name = string("x_31_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_31_dilations_0 = const()[name = string("x_31_dilations_0"), val = tensor([1, 1])]; - int32 x_31_groups_0 = const()[name = string("x_31_groups_0"), val = int32(1)]; - tensor model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63523840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69495872))))[name = string("model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_31_cast_fp16 = conv(dilations = x_31_dilations_0, groups = x_31_groups_0, pad = x_31_pad_0, pad_type = x_31_pad_type_0, strides = x_31_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_35_cast_fp16)[name = string("x_31_cast_fp16")]; - string b_3_pad_type_0 = const()[name = string("b_3_pad_type_0"), val = string("valid")]; - tensor b_3_strides_0 = const()[name = string("b_3_strides_0"), val = tensor([1, 1])]; - tensor b_3_pad_0 = const()[name = string("b_3_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_3_dilations_0 = const()[name = string("b_3_dilations_0"), val = tensor([1, 1])]; - int32 b_3_groups_0 = const()[name = string("b_3_groups_0"), val = int32(1)]; - tensor model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69606528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75578560))))[name = string("model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_3_cast_fp16 = conv(dilations = b_3_dilations_0, groups = b_3_groups_0, pad = b_3_pad_0, pad_type = b_3_pad_type_0, strides = b_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_35_cast_fp16)[name = string("b_3_cast_fp16")]; - string var_2826_mode_0 = const()[name = string("op_2826_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_2826_cast_fp16 = gelu(mode = var_2826_mode_0, x = x_31_cast_fp16)[name = string("op_2826_cast_fp16")]; - tensor input_37_cast_fp16 = mul(x = var_2826_cast_fp16, y = b_3_cast_fp16)[name = string("input_37_cast_fp16")]; - string e_3_pad_type_0 = const()[name = string("e_3_pad_type_0"), val = string("valid")]; - tensor e_3_strides_0 = const()[name = string("e_3_strides_0"), val = tensor([1, 1])]; - tensor e_3_pad_0 = const()[name = string("e_3_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_3_dilations_0 = const()[name = string("e_3_dilations_0"), val = tensor([1, 1])]; - int32 e_3_groups_0 = const()[name = string("e_3_groups_0"), val = int32(1)]; - tensor model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75689216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81661248))))[name = string("model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_3_cast_fp16 = conv(dilations = e_3_dilations_0, groups = e_3_groups_0, pad = e_3_pad_0, pad_type = e_3_pad_type_0, strides = e_3_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_37_cast_fp16)[name = string("e_3_cast_fp16")]; - tensor var_2834_axes_0 = const()[name = string("op_2834_axes_0"), val = tensor([2])]; - tensor var_2834_cast_fp16 = squeeze(axes = var_2834_axes_0, x = e_3_cast_fp16)[name = string("op_2834_cast_fp16")]; - tensor var_2835 = const()[name = string("op_2835"), val = tensor([0, 2, 1])]; - int32 var_2846 = const()[name = string("op_2846"), val = int32(-1)]; - fp16 const_72_promoted_to_fp16 = const()[name = string("const_72_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_29_cast_fp16 = transpose(perm = var_2835, x = var_2834_cast_fp16)[name = string("transpose_144")]; - tensor var_2848_cast_fp16 = mul(x = hidden_states_29_cast_fp16, y = const_72_promoted_to_fp16)[name = string("op_2848_cast_fp16")]; - bool input_39_interleave_0 = const()[name = string("input_39_interleave_0"), val = bool(false)]; - tensor input_39_cast_fp16 = concat(axis = var_2846, interleave = input_39_interleave_0, values = (hidden_states_29_cast_fp16, var_2848_cast_fp16))[name = string("input_39_cast_fp16")]; - tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; - fp16 var_2843_to_fp16 = const()[name = string("op_2843_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_2843_to_fp16, x = input_39_cast_fp16)[name = string("normed_45_cast_fp16")]; - tensor normed_47_begin_0 = const()[name = string("normed_47_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_47_end_0 = const()[name = string("normed_47_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_47_end_mask_0 = const()[name = string("normed_47_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_47_cast_fp16 = slice_by_index(begin = normed_47_begin_0, end = normed_47_end_0, end_mask = normed_47_end_mask_0, x = normed_45_cast_fp16)[name = string("normed_47_cast_fp16")]; - tensor var_2862_to_fp16 = const()[name = string("op_2862_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81679744)))]; - tensor hidden_states_31_cast_fp16 = mul(x = normed_47_cast_fp16, y = var_2862_to_fp16)[name = string("hidden_states_31_cast_fp16")]; - tensor hidden_states_33_cast_fp16 = add(x = hidden_states_27_cast_fp16, y = hidden_states_31_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; - int32 var_2913 = const()[name = string("op_2913"), val = int32(-1)]; - fp16 const_76_promoted_to_fp16 = const()[name = string("const_76_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2915_cast_fp16 = mul(x = hidden_states_33_cast_fp16, y = const_76_promoted_to_fp16)[name = string("op_2915_cast_fp16")]; - bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; - tensor input_41_cast_fp16 = concat(axis = var_2913, interleave = input_41_interleave_0, values = (hidden_states_33_cast_fp16, var_2915_cast_fp16))[name = string("input_41_cast_fp16")]; - tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; - fp16 var_2910_to_fp16 = const()[name = string("op_2910_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_2910_to_fp16, x = input_41_cast_fp16)[name = string("normed_49_cast_fp16")]; - tensor normed_51_begin_0 = const()[name = string("normed_51_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_51_end_0 = const()[name = string("normed_51_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_51_end_mask_0 = const()[name = string("normed_51_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_51_cast_fp16 = slice_by_index(begin = normed_51_begin_0, end = normed_51_end_0, end_mask = normed_51_end_mask_0, x = normed_49_cast_fp16)[name = string("normed_51_cast_fp16")]; - tensor var_2929_to_fp16 = const()[name = string("op_2929_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81682112)))]; - tensor hidden_states_35_cast_fp16 = mul(x = normed_51_cast_fp16, y = var_2929_to_fp16)[name = string("hidden_states_35_cast_fp16")]; - tensor var_2934 = const()[name = string("op_2934"), val = tensor([0, 2, 1])]; - tensor var_2937_axes_0 = const()[name = string("op_2937_axes_0"), val = tensor([2])]; - tensor var_2935_cast_fp16 = transpose(perm = var_2934, x = hidden_states_35_cast_fp16)[name = string("transpose_143")]; - tensor var_2937_cast_fp16 = expand_dims(axes = var_2937_axes_0, x = var_2935_cast_fp16)[name = string("op_2937_cast_fp16")]; - string var_2953_pad_type_0 = const()[name = string("op_2953_pad_type_0"), val = string("valid")]; - tensor var_2953_strides_0 = const()[name = string("op_2953_strides_0"), val = tensor([1, 1])]; - tensor var_2953_pad_0 = const()[name = string("op_2953_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2953_dilations_0 = const()[name = string("op_2953_dilations_0"), val = tensor([1, 1])]; - int32 var_2953_groups_0 = const()[name = string("op_2953_groups_0"), val = int32(1)]; - tensor var_2953 = conv(dilations = var_2953_dilations_0, groups = var_2953_groups_0, pad = var_2953_pad_0, pad_type = var_2953_pad_type_0, strides = var_2953_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_2937_cast_fp16)[name = string("op_2953")]; - tensor var_2958 = const()[name = string("op_2958"), val = tensor([1, 4, 1, 256])]; - tensor var_2959 = reshape(shape = var_2958, x = var_2953)[name = string("op_2959")]; - string var_2975_pad_type_0 = const()[name = string("op_2975_pad_type_0"), val = string("valid")]; - tensor var_2975_strides_0 = const()[name = string("op_2975_strides_0"), val = tensor([1, 1])]; - tensor var_2975_pad_0 = const()[name = string("op_2975_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2975_dilations_0 = const()[name = string("op_2975_dilations_0"), val = tensor([1, 1])]; - int32 var_2975_groups_0 = const()[name = string("op_2975_groups_0"), val = int32(1)]; - tensor var_2975 = conv(dilations = var_2975_dilations_0, groups = var_2975_groups_0, pad = var_2975_pad_0, pad_type = var_2975_pad_type_0, strides = var_2975_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_2937_cast_fp16)[name = string("op_2975")]; - tensor var_2980 = const()[name = string("op_2980"), val = tensor([1, 1, 1, 256])]; - tensor var_2981 = reshape(shape = var_2980, x = var_2975)[name = string("op_2981")]; - string var_2997_pad_type_0 = const()[name = string("op_2997_pad_type_0"), val = string("valid")]; - tensor var_2997_strides_0 = const()[name = string("op_2997_strides_0"), val = tensor([1, 1])]; - tensor var_2997_pad_0 = const()[name = string("op_2997_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2997_dilations_0 = const()[name = string("op_2997_dilations_0"), val = tensor([1, 1])]; - int32 var_2997_groups_0 = const()[name = string("op_2997_groups_0"), val = int32(1)]; - tensor var_2997 = conv(dilations = var_2997_dilations_0, groups = var_2997_groups_0, pad = var_2997_pad_0, pad_type = var_2997_pad_type_0, strides = var_2997_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_2937_cast_fp16)[name = string("op_2997")]; - tensor var_3002 = const()[name = string("op_3002"), val = tensor([1, 1, 1, 256])]; - tensor var_3003 = reshape(shape = var_3002, x = var_2997)[name = string("op_3003")]; - int32 var_3018 = const()[name = string("op_3018"), val = int32(-1)]; - fp16 const_80_promoted = const()[name = string("const_80_promoted"), val = fp16(-0x1p+0)]; - tensor var_3020 = mul(x = var_2959, y = const_80_promoted)[name = string("op_3020")]; - bool input_45_interleave_0 = const()[name = string("input_45_interleave_0"), val = bool(false)]; - tensor input_45 = concat(axis = var_3018, interleave = input_45_interleave_0, values = (var_2959, var_3020))[name = string("input_45")]; - tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; - fp16 var_3015_to_fp16 = const()[name = string("op_3015_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_3015_to_fp16, x = input_45)[name = string("normed_53_cast_fp16")]; - tensor normed_55_begin_0 = const()[name = string("normed_55_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_55_end_0 = const()[name = string("normed_55_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_55_end_mask_0 = const()[name = string("normed_55_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_55 = slice_by_index(begin = normed_55_begin_0, end = normed_55_end_0, end_mask = normed_55_end_mask_0, x = normed_53_cast_fp16)[name = string("normed_55")]; - tensor var_3034_to_fp16 = const()[name = string("op_3034_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81684480)))]; - tensor q_5_cast_fp16 = mul(x = normed_55, y = var_3034_to_fp16)[name = string("q_5_cast_fp16")]; - int32 var_3045 = const()[name = string("op_3045"), val = int32(-1)]; - fp16 const_84_promoted = const()[name = string("const_84_promoted"), val = fp16(-0x1p+0)]; - tensor var_3047 = mul(x = var_2981, y = const_84_promoted)[name = string("op_3047")]; - bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; - tensor input_47 = concat(axis = var_3045, interleave = input_47_interleave_0, values = (var_2981, var_3047))[name = string("input_47")]; - tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; - fp16 var_3042_to_fp16 = const()[name = string("op_3042_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_3042_to_fp16, x = input_47)[name = string("normed_57_cast_fp16")]; - tensor normed_59_begin_0 = const()[name = string("normed_59_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_59_end_0 = const()[name = string("normed_59_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_59_end_mask_0 = const()[name = string("normed_59_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_59 = slice_by_index(begin = normed_59_begin_0, end = normed_59_end_0, end_mask = normed_59_end_mask_0, x = normed_57_cast_fp16)[name = string("normed_59")]; - tensor var_3061_to_fp16 = const()[name = string("op_3061_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81685056)))]; - tensor k_5_cast_fp16 = mul(x = normed_59, y = var_3061_to_fp16)[name = string("k_5_cast_fp16")]; - tensor var_3063_cast_fp16 = mul(x = q_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3063_cast_fp16")]; - tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5_cast_fp16)[name = string("x1_9_cast_fp16")]; - tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5_cast_fp16)[name = string("x2_9_cast_fp16")]; - fp16 const_90_promoted_to_fp16 = const()[name = string("const_90_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3084_cast_fp16 = mul(x = x2_9_cast_fp16, y = const_90_promoted_to_fp16)[name = string("op_3084_cast_fp16")]; - int32 var_3086 = const()[name = string("op_3086"), val = int32(-1)]; - bool var_3087_interleave_0 = const()[name = string("op_3087_interleave_0"), val = bool(false)]; - tensor var_3087_cast_fp16 = concat(axis = var_3086, interleave = var_3087_interleave_0, values = (var_3084_cast_fp16, x1_9_cast_fp16))[name = string("op_3087_cast_fp16")]; - tensor var_3088_cast_fp16 = mul(x = var_3087_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3088_cast_fp16")]; - tensor query_states_9_cast_fp16 = add(x = var_3063_cast_fp16, y = var_3088_cast_fp16)[name = string("query_states_9_cast_fp16")]; - tensor var_3091_cast_fp16 = mul(x = k_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3091_cast_fp16")]; - tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5_cast_fp16)[name = string("x1_11_cast_fp16")]; - tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5_cast_fp16)[name = string("x2_11_cast_fp16")]; - fp16 const_93_promoted_to_fp16 = const()[name = string("const_93_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3112_cast_fp16 = mul(x = x2_11_cast_fp16, y = const_93_promoted_to_fp16)[name = string("op_3112_cast_fp16")]; - int32 var_3114 = const()[name = string("op_3114"), val = int32(-1)]; - bool var_3115_interleave_0 = const()[name = string("op_3115_interleave_0"), val = bool(false)]; - tensor var_3115_cast_fp16 = concat(axis = var_3114, interleave = var_3115_interleave_0, values = (var_3112_cast_fp16, x1_11_cast_fp16))[name = string("op_3115_cast_fp16")]; - tensor var_3116_cast_fp16 = mul(x = var_3115_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3116_cast_fp16")]; - tensor key_states_9_cast_fp16 = add(x = var_3091_cast_fp16, y = var_3116_cast_fp16)[name = string("key_states_9_cast_fp16")]; - tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; - tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; - tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; - tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; - int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; - bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; - tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")]; - tensor concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor([0])]; - tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; - int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; - bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; - tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_1909, concat_19_values3_0))[name = string("concat_19")]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_5_stride_0, update = key_states_9_cast_fp16, x = coreml_update_state_55)[name = string("model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_4_write_state")]; - tensor coreml_update_state_56 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_4")]; - tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([24])]; - tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; - tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; - tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([25])]; - int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)]; - bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)]; - tensor concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")]; - tensor concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor([0])]; - tensor concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor([0])]; - int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; - bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; - tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_1909, concat_23_values3_0))[name = string("concat_23")]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_6_stride_0, update = var_3003, x = coreml_update_state_56)[name = string("model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_5_write_state")]; - tensor coreml_update_state_57 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_5")]; - tensor var_3171_begin_0 = const()[name = string("op_3171_begin_0"), val = tensor([2, 0, 0, 0])]; - tensor var_3171_end_0 = const()[name = string("op_3171_end_0"), val = tensor([3, 1, 512, 256])]; - tensor var_3171_end_mask_0 = const()[name = string("op_3171_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_3171_cast_fp16 = slice_by_index(begin = var_3171_begin_0, end = var_3171_end_0, end_mask = var_3171_end_mask_0, x = coreml_update_state_57)[name = string("op_3171_cast_fp16")]; - tensor var_3178_begin_0 = const()[name = string("op_3178_begin_0"), val = tensor([24, 0, 0, 0])]; - tensor var_3178_end_0 = const()[name = string("op_3178_end_0"), val = tensor([25, 1, 512, 256])]; - tensor var_3178_end_mask_0 = const()[name = string("op_3178_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_3178_cast_fp16 = slice_by_index(begin = var_3178_begin_0, end = var_3178_end_0, end_mask = var_3178_end_mask_0, x = coreml_update_state_57)[name = string("op_3178_cast_fp16")]; - tensor var_3215 = const()[name = string("op_3215"), val = tensor([1, 4, 1, 1])]; - tensor x_37_cast_fp16 = tile(reps = var_3215, x = var_3171_cast_fp16)[name = string("x_37_cast_fp16")]; - tensor var_3235 = const()[name = string("op_3235"), val = tensor([1, 4, 1, 1])]; - tensor x_43_cast_fp16 = tile(reps = var_3235, x = var_3178_cast_fp16)[name = string("x_43_cast_fp16")]; - bool var_3262_transpose_x_1 = const()[name = string("op_3262_transpose_x_1"), val = bool(false)]; - bool var_3262_transpose_y_1 = const()[name = string("op_3262_transpose_y_1"), val = bool(true)]; - tensor var_3262 = matmul(transpose_x = var_3262_transpose_x_1, transpose_y = var_3262_transpose_y_1, x = query_states_9_cast_fp16, y = x_37_cast_fp16)[name = string("op_3262")]; - fp16 var_3263_to_fp16 = const()[name = string("op_3263_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_13_cast_fp16 = mul(x = var_3262, y = var_3263_to_fp16)[name = string("attn_weights_13_cast_fp16")]; - tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = var_2083)[name = string("attn_weights_15_cast_fp16")]; - int32 var_3298 = const()[name = string("op_3298"), val = int32(-1)]; - tensor attn_weights_17_cast_fp16 = softmax(axis = var_3298, x = attn_weights_15_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; - bool attn_output_21_transpose_x_0 = const()[name = string("attn_output_21_transpose_x_0"), val = bool(false)]; - bool attn_output_21_transpose_y_0 = const()[name = string("attn_output_21_transpose_y_0"), val = bool(false)]; - tensor attn_output_21_cast_fp16 = matmul(transpose_x = attn_output_21_transpose_x_0, transpose_y = attn_output_21_transpose_y_0, x = attn_weights_17_cast_fp16, y = x_43_cast_fp16)[name = string("attn_output_21_cast_fp16")]; - tensor var_3309_perm_0 = const()[name = string("op_3309_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_3313 = const()[name = string("op_3313"), val = tensor([1, 1, 1024])]; - tensor var_3309_cast_fp16 = transpose(perm = var_3309_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_142")]; - tensor attn_output_25_cast_fp16 = reshape(shape = var_3313, x = var_3309_cast_fp16)[name = string("attn_output_25_cast_fp16")]; - tensor var_3318 = const()[name = string("op_3318"), val = tensor([0, 2, 1])]; - string var_3334_pad_type_0 = const()[name = string("op_3334_pad_type_0"), val = string("valid")]; - int32 var_3334_groups_0 = const()[name = string("op_3334_groups_0"), val = int32(1)]; - tensor var_3334_strides_0 = const()[name = string("op_3334_strides_0"), val = tensor([1])]; - tensor var_3334_pad_0 = const()[name = string("op_3334_pad_0"), val = tensor([0, 0])]; - tensor var_3334_dilations_0 = const()[name = string("op_3334_dilations_0"), val = tensor([1])]; - tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81685632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82570432))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_3319_cast_fp16 = transpose(perm = var_3318, x = attn_output_25_cast_fp16)[name = string("transpose_141")]; - tensor var_3334_cast_fp16 = conv(dilations = var_3334_dilations_0, groups = var_3334_groups_0, pad = var_3334_pad_0, pad_type = var_3334_pad_type_0, strides = var_3334_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_3319_cast_fp16)[name = string("op_3334_cast_fp16")]; - tensor var_3338 = const()[name = string("op_3338"), val = tensor([0, 2, 1])]; - int32 var_3349 = const()[name = string("op_3349"), val = int32(-1)]; - fp16 const_102_promoted_to_fp16 = const()[name = string("const_102_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_41_cast_fp16 = transpose(perm = var_3338, x = var_3334_cast_fp16)[name = string("transpose_140")]; - tensor var_3351_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_102_promoted_to_fp16)[name = string("op_3351_cast_fp16")]; - bool input_51_interleave_0 = const()[name = string("input_51_interleave_0"), val = bool(false)]; - tensor input_51_cast_fp16 = concat(axis = var_3349, interleave = input_51_interleave_0, values = (hidden_states_41_cast_fp16, var_3351_cast_fp16))[name = string("input_51_cast_fp16")]; - tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; - fp16 var_3346_to_fp16 = const()[name = string("op_3346_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_3346_to_fp16, x = input_51_cast_fp16)[name = string("normed_61_cast_fp16")]; - tensor normed_63_begin_0 = const()[name = string("normed_63_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_63_end_0 = const()[name = string("normed_63_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_63_end_mask_0 = const()[name = string("normed_63_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_63_cast_fp16 = slice_by_index(begin = normed_63_begin_0, end = normed_63_end_0, end_mask = normed_63_end_mask_0, x = normed_61_cast_fp16)[name = string("normed_63_cast_fp16")]; - tensor var_3365_to_fp16 = const()[name = string("op_3365_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82588928)))]; - tensor attn_output_29_cast_fp16 = mul(x = normed_63_cast_fp16, y = var_3365_to_fp16)[name = string("attn_output_29_cast_fp16")]; - tensor hidden_states_43_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = attn_output_29_cast_fp16)[name = string("hidden_states_43_cast_fp16")]; - int32 var_3378 = const()[name = string("op_3378"), val = int32(-1)]; - fp16 const_106_promoted_to_fp16 = const()[name = string("const_106_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3380_cast_fp16 = mul(x = hidden_states_43_cast_fp16, y = const_106_promoted_to_fp16)[name = string("op_3380_cast_fp16")]; - bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; - tensor input_53_cast_fp16 = concat(axis = var_3378, interleave = input_53_interleave_0, values = (hidden_states_43_cast_fp16, var_3380_cast_fp16))[name = string("input_53_cast_fp16")]; - tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; - fp16 var_3375_to_fp16 = const()[name = string("op_3375_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_3375_to_fp16, x = input_53_cast_fp16)[name = string("normed_65_cast_fp16")]; - tensor normed_67_begin_0 = const()[name = string("normed_67_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_67_end_0 = const()[name = string("normed_67_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_67_end_mask_0 = const()[name = string("normed_67_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_67_cast_fp16 = slice_by_index(begin = normed_67_begin_0, end = normed_67_end_0, end_mask = normed_67_end_mask_0, x = normed_65_cast_fp16)[name = string("normed_67_cast_fp16")]; - tensor var_3394_to_fp16 = const()[name = string("op_3394_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82591296)))]; - tensor x_45_cast_fp16 = mul(x = normed_67_cast_fp16, y = var_3394_to_fp16)[name = string("x_45_cast_fp16")]; - tensor var_3406 = const()[name = string("op_3406"), val = tensor([0, 2, 1])]; - tensor input_55_axes_0 = const()[name = string("input_55_axes_0"), val = tensor([2])]; - tensor var_3407_cast_fp16 = transpose(perm = var_3406, x = x_45_cast_fp16)[name = string("transpose_139")]; - tensor input_55_cast_fp16 = expand_dims(axes = input_55_axes_0, x = var_3407_cast_fp16)[name = string("input_55_cast_fp16")]; - string x_47_pad_type_0 = const()[name = string("x_47_pad_type_0"), val = string("valid")]; - tensor x_47_strides_0 = const()[name = string("x_47_strides_0"), val = tensor([1, 1])]; - tensor x_47_pad_0 = const()[name = string("x_47_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_47_dilations_0 = const()[name = string("x_47_dilations_0"), val = tensor([1, 1])]; - int32 x_47_groups_0 = const()[name = string("x_47_groups_0"), val = int32(1)]; - tensor model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82593664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88565696))))[name = string("model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_47_cast_fp16 = conv(dilations = x_47_dilations_0, groups = x_47_groups_0, pad = x_47_pad_0, pad_type = x_47_pad_type_0, strides = x_47_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_55_cast_fp16)[name = string("x_47_cast_fp16")]; - string b_5_pad_type_0 = const()[name = string("b_5_pad_type_0"), val = string("valid")]; - tensor b_5_strides_0 = const()[name = string("b_5_strides_0"), val = tensor([1, 1])]; - tensor b_5_pad_0 = const()[name = string("b_5_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_5_dilations_0 = const()[name = string("b_5_dilations_0"), val = tensor([1, 1])]; - int32 b_5_groups_0 = const()[name = string("b_5_groups_0"), val = int32(1)]; - tensor model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88676352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94648384))))[name = string("model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_5_cast_fp16 = conv(dilations = b_5_dilations_0, groups = b_5_groups_0, pad = b_5_pad_0, pad_type = b_5_pad_type_0, strides = b_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_55_cast_fp16)[name = string("b_5_cast_fp16")]; - string var_3432_mode_0 = const()[name = string("op_3432_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_3432_cast_fp16 = gelu(mode = var_3432_mode_0, x = x_47_cast_fp16)[name = string("op_3432_cast_fp16")]; - tensor input_57_cast_fp16 = mul(x = var_3432_cast_fp16, y = b_5_cast_fp16)[name = string("input_57_cast_fp16")]; - string e_5_pad_type_0 = const()[name = string("e_5_pad_type_0"), val = string("valid")]; - tensor e_5_strides_0 = const()[name = string("e_5_strides_0"), val = tensor([1, 1])]; - tensor e_5_pad_0 = const()[name = string("e_5_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_5_dilations_0 = const()[name = string("e_5_dilations_0"), val = tensor([1, 1])]; - int32 e_5_groups_0 = const()[name = string("e_5_groups_0"), val = int32(1)]; - tensor model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94759040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100731072))))[name = string("model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_5_cast_fp16 = conv(dilations = e_5_dilations_0, groups = e_5_groups_0, pad = e_5_pad_0, pad_type = e_5_pad_type_0, strides = e_5_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_57_cast_fp16)[name = string("e_5_cast_fp16")]; - tensor var_3440_axes_0 = const()[name = string("op_3440_axes_0"), val = tensor([2])]; - tensor var_3440_cast_fp16 = squeeze(axes = var_3440_axes_0, x = e_5_cast_fp16)[name = string("op_3440_cast_fp16")]; - tensor var_3441 = const()[name = string("op_3441"), val = tensor([0, 2, 1])]; - int32 var_3452 = const()[name = string("op_3452"), val = int32(-1)]; - fp16 const_110_promoted_to_fp16 = const()[name = string("const_110_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_45_cast_fp16 = transpose(perm = var_3441, x = var_3440_cast_fp16)[name = string("transpose_138")]; - tensor var_3454_cast_fp16 = mul(x = hidden_states_45_cast_fp16, y = const_110_promoted_to_fp16)[name = string("op_3454_cast_fp16")]; - bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; - tensor input_59_cast_fp16 = concat(axis = var_3452, interleave = input_59_interleave_0, values = (hidden_states_45_cast_fp16, var_3454_cast_fp16))[name = string("input_59_cast_fp16")]; - tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; - fp16 var_3449_to_fp16 = const()[name = string("op_3449_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_3449_to_fp16, x = input_59_cast_fp16)[name = string("normed_69_cast_fp16")]; - tensor normed_71_begin_0 = const()[name = string("normed_71_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_71_end_0 = const()[name = string("normed_71_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_71_end_mask_0 = const()[name = string("normed_71_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_71_cast_fp16 = slice_by_index(begin = normed_71_begin_0, end = normed_71_end_0, end_mask = normed_71_end_mask_0, x = normed_69_cast_fp16)[name = string("normed_71_cast_fp16")]; - tensor var_3468_to_fp16 = const()[name = string("op_3468_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100749568)))]; - tensor hidden_states_47_cast_fp16 = mul(x = normed_71_cast_fp16, y = var_3468_to_fp16)[name = string("hidden_states_47_cast_fp16")]; - tensor hidden_states_49_cast_fp16 = add(x = hidden_states_43_cast_fp16, y = hidden_states_47_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; - int32 var_3519 = const()[name = string("op_3519"), val = int32(-1)]; - fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3521_cast_fp16 = mul(x = hidden_states_49_cast_fp16, y = const_114_promoted_to_fp16)[name = string("op_3521_cast_fp16")]; - bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)]; - tensor input_61_cast_fp16 = concat(axis = var_3519, interleave = input_61_interleave_0, values = (hidden_states_49_cast_fp16, var_3521_cast_fp16))[name = string("input_61_cast_fp16")]; - tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; - fp16 var_3516_to_fp16 = const()[name = string("op_3516_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_3516_to_fp16, x = input_61_cast_fp16)[name = string("normed_73_cast_fp16")]; - tensor normed_75_begin_0 = const()[name = string("normed_75_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_75_end_0 = const()[name = string("normed_75_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_75_end_mask_0 = const()[name = string("normed_75_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_75_cast_fp16 = slice_by_index(begin = normed_75_begin_0, end = normed_75_end_0, end_mask = normed_75_end_mask_0, x = normed_73_cast_fp16)[name = string("normed_75_cast_fp16")]; - tensor var_3535_to_fp16 = const()[name = string("op_3535_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100751936)))]; - tensor hidden_states_51_cast_fp16 = mul(x = normed_75_cast_fp16, y = var_3535_to_fp16)[name = string("hidden_states_51_cast_fp16")]; - tensor var_3540 = const()[name = string("op_3540"), val = tensor([0, 2, 1])]; - tensor var_3543_axes_0 = const()[name = string("op_3543_axes_0"), val = tensor([2])]; - tensor var_3541_cast_fp16 = transpose(perm = var_3540, x = hidden_states_51_cast_fp16)[name = string("transpose_137")]; - tensor var_3543_cast_fp16 = expand_dims(axes = var_3543_axes_0, x = var_3541_cast_fp16)[name = string("op_3543_cast_fp16")]; - string var_3559_pad_type_0 = const()[name = string("op_3559_pad_type_0"), val = string("valid")]; - tensor var_3559_strides_0 = const()[name = string("op_3559_strides_0"), val = tensor([1, 1])]; - tensor var_3559_pad_0 = const()[name = string("op_3559_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_3559_dilations_0 = const()[name = string("op_3559_dilations_0"), val = tensor([1, 1])]; - int32 var_3559_groups_0 = const()[name = string("op_3559_groups_0"), val = int32(1)]; - tensor var_3559 = conv(dilations = var_3559_dilations_0, groups = var_3559_groups_0, pad = var_3559_pad_0, pad_type = var_3559_pad_type_0, strides = var_3559_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_3543_cast_fp16)[name = string("op_3559")]; - tensor var_3564 = const()[name = string("op_3564"), val = tensor([1, 4, 1, 256])]; - tensor var_3565 = reshape(shape = var_3564, x = var_3559)[name = string("op_3565")]; - string var_3581_pad_type_0 = const()[name = string("op_3581_pad_type_0"), val = string("valid")]; - tensor var_3581_strides_0 = const()[name = string("op_3581_strides_0"), val = tensor([1, 1])]; - tensor var_3581_pad_0 = const()[name = string("op_3581_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_3581_dilations_0 = const()[name = string("op_3581_dilations_0"), val = tensor([1, 1])]; - int32 var_3581_groups_0 = const()[name = string("op_3581_groups_0"), val = int32(1)]; - tensor var_3581 = conv(dilations = var_3581_dilations_0, groups = var_3581_groups_0, pad = var_3581_pad_0, pad_type = var_3581_pad_type_0, strides = var_3581_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_3543_cast_fp16)[name = string("op_3581")]; - tensor var_3586 = const()[name = string("op_3586"), val = tensor([1, 1, 1, 256])]; - tensor var_3587 = reshape(shape = var_3586, x = var_3581)[name = string("op_3587")]; - string var_3603_pad_type_0 = const()[name = string("op_3603_pad_type_0"), val = string("valid")]; - tensor var_3603_strides_0 = const()[name = string("op_3603_strides_0"), val = tensor([1, 1])]; - tensor var_3603_pad_0 = const()[name = string("op_3603_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_3603_dilations_0 = const()[name = string("op_3603_dilations_0"), val = tensor([1, 1])]; - int32 var_3603_groups_0 = const()[name = string("op_3603_groups_0"), val = int32(1)]; - tensor var_3603 = conv(dilations = var_3603_dilations_0, groups = var_3603_groups_0, pad = var_3603_pad_0, pad_type = var_3603_pad_type_0, strides = var_3603_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_3543_cast_fp16)[name = string("op_3603")]; - tensor var_3608 = const()[name = string("op_3608"), val = tensor([1, 1, 1, 256])]; - tensor var_3609 = reshape(shape = var_3608, x = var_3603)[name = string("op_3609")]; - int32 var_3624 = const()[name = string("op_3624"), val = int32(-1)]; - fp16 const_118_promoted = const()[name = string("const_118_promoted"), val = fp16(-0x1p+0)]; - tensor var_3626 = mul(x = var_3565, y = const_118_promoted)[name = string("op_3626")]; - bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; - tensor input_65 = concat(axis = var_3624, interleave = input_65_interleave_0, values = (var_3565, var_3626))[name = string("input_65")]; - tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; - fp16 var_3621_to_fp16 = const()[name = string("op_3621_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_3621_to_fp16, x = input_65)[name = string("normed_77_cast_fp16")]; - tensor normed_79_begin_0 = const()[name = string("normed_79_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_79_end_0 = const()[name = string("normed_79_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_79_end_mask_0 = const()[name = string("normed_79_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_79 = slice_by_index(begin = normed_79_begin_0, end = normed_79_end_0, end_mask = normed_79_end_mask_0, x = normed_77_cast_fp16)[name = string("normed_79")]; - tensor var_3640_to_fp16 = const()[name = string("op_3640_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100754304)))]; - tensor q_7_cast_fp16 = mul(x = normed_79, y = var_3640_to_fp16)[name = string("q_7_cast_fp16")]; - int32 var_3651 = const()[name = string("op_3651"), val = int32(-1)]; - fp16 const_122_promoted = const()[name = string("const_122_promoted"), val = fp16(-0x1p+0)]; - tensor var_3653 = mul(x = var_3587, y = const_122_promoted)[name = string("op_3653")]; - bool input_67_interleave_0 = const()[name = string("input_67_interleave_0"), val = bool(false)]; - tensor input_67 = concat(axis = var_3651, interleave = input_67_interleave_0, values = (var_3587, var_3653))[name = string("input_67")]; - tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; - fp16 var_3648_to_fp16 = const()[name = string("op_3648_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_3648_to_fp16, x = input_67)[name = string("normed_81_cast_fp16")]; - tensor normed_83_begin_0 = const()[name = string("normed_83_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_83_end_0 = const()[name = string("normed_83_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_83_end_mask_0 = const()[name = string("normed_83_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_83 = slice_by_index(begin = normed_83_begin_0, end = normed_83_end_0, end_mask = normed_83_end_mask_0, x = normed_81_cast_fp16)[name = string("normed_83")]; - tensor var_3667_to_fp16 = const()[name = string("op_3667_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100754880)))]; - tensor k_7_cast_fp16 = mul(x = normed_83, y = var_3667_to_fp16)[name = string("k_7_cast_fp16")]; - tensor var_3669_cast_fp16 = mul(x = q_7_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3669_cast_fp16")]; - tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7_cast_fp16)[name = string("x1_13_cast_fp16")]; - tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7_cast_fp16)[name = string("x2_13_cast_fp16")]; - fp16 const_128_promoted_to_fp16 = const()[name = string("const_128_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3690_cast_fp16 = mul(x = x2_13_cast_fp16, y = const_128_promoted_to_fp16)[name = string("op_3690_cast_fp16")]; - int32 var_3692 = const()[name = string("op_3692"), val = int32(-1)]; - bool var_3693_interleave_0 = const()[name = string("op_3693_interleave_0"), val = bool(false)]; - tensor var_3693_cast_fp16 = concat(axis = var_3692, interleave = var_3693_interleave_0, values = (var_3690_cast_fp16, x1_13_cast_fp16))[name = string("op_3693_cast_fp16")]; - tensor var_3694_cast_fp16 = mul(x = var_3693_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3694_cast_fp16")]; - tensor query_states_13_cast_fp16 = add(x = var_3669_cast_fp16, y = var_3694_cast_fp16)[name = string("query_states_13_cast_fp16")]; - tensor var_3697_cast_fp16 = mul(x = k_7_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3697_cast_fp16")]; - tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7_cast_fp16)[name = string("x1_15_cast_fp16")]; - tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7_cast_fp16)[name = string("x2_15_cast_fp16")]; - fp16 const_131_promoted_to_fp16 = const()[name = string("const_131_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3718_cast_fp16 = mul(x = x2_15_cast_fp16, y = const_131_promoted_to_fp16)[name = string("op_3718_cast_fp16")]; - int32 var_3720 = const()[name = string("op_3720"), val = int32(-1)]; - bool var_3721_interleave_0 = const()[name = string("op_3721_interleave_0"), val = bool(false)]; - tensor var_3721_cast_fp16 = concat(axis = var_3720, interleave = var_3721_interleave_0, values = (var_3718_cast_fp16, x1_15_cast_fp16))[name = string("op_3721_cast_fp16")]; - tensor var_3722_cast_fp16 = mul(x = var_3721_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3722_cast_fp16")]; - tensor key_states_13_cast_fp16 = add(x = var_3697_cast_fp16, y = var_3722_cast_fp16)[name = string("key_states_13_cast_fp16")]; - tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; - tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; - tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; - tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; - int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; - bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; - tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")]; - tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; - tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; - int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; - bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; - tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_1909, concat_27_values3_0))[name = string("concat_27")]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_7_stride_0, update = key_states_13_cast_fp16, x = coreml_update_state_57)[name = string("model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_6_write_state")]; - tensor coreml_update_state_58 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_6")]; - tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([25])]; - tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; - tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; - tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([26])]; - int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; - bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; - tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")]; - tensor concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor([0])]; - tensor concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor([0])]; - int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)]; - bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)]; - tensor concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_1909, concat_31_values3_0))[name = string("concat_31")]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_8_stride_0, update = var_3609, x = coreml_update_state_58)[name = string("model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_7_write_state")]; - tensor coreml_update_state_59 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_7")]; - tensor var_3777_begin_0 = const()[name = string("op_3777_begin_0"), val = tensor([3, 0, 0, 0])]; - tensor var_3777_end_0 = const()[name = string("op_3777_end_0"), val = tensor([4, 1, 512, 256])]; - tensor var_3777_end_mask_0 = const()[name = string("op_3777_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_3777_cast_fp16 = slice_by_index(begin = var_3777_begin_0, end = var_3777_end_0, end_mask = var_3777_end_mask_0, x = coreml_update_state_59)[name = string("op_3777_cast_fp16")]; - tensor var_3784_begin_0 = const()[name = string("op_3784_begin_0"), val = tensor([25, 0, 0, 0])]; - tensor var_3784_end_0 = const()[name = string("op_3784_end_0"), val = tensor([26, 1, 512, 256])]; - tensor var_3784_end_mask_0 = const()[name = string("op_3784_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_3784_cast_fp16 = slice_by_index(begin = var_3784_begin_0, end = var_3784_end_0, end_mask = var_3784_end_mask_0, x = coreml_update_state_59)[name = string("op_3784_cast_fp16")]; - tensor var_3821 = const()[name = string("op_3821"), val = tensor([1, 4, 1, 1])]; - tensor x_53_cast_fp16 = tile(reps = var_3821, x = var_3777_cast_fp16)[name = string("x_53_cast_fp16")]; - tensor var_3841 = const()[name = string("op_3841"), val = tensor([1, 4, 1, 1])]; - tensor x_59_cast_fp16 = tile(reps = var_3841, x = var_3784_cast_fp16)[name = string("x_59_cast_fp16")]; - bool var_3868_transpose_x_1 = const()[name = string("op_3868_transpose_x_1"), val = bool(false)]; - bool var_3868_transpose_y_1 = const()[name = string("op_3868_transpose_y_1"), val = bool(true)]; - tensor var_3868 = matmul(transpose_x = var_3868_transpose_x_1, transpose_y = var_3868_transpose_y_1, x = query_states_13_cast_fp16, y = x_53_cast_fp16)[name = string("op_3868")]; - fp16 var_3869_to_fp16 = const()[name = string("op_3869_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_19_cast_fp16 = mul(x = var_3868, y = var_3869_to_fp16)[name = string("attn_weights_19_cast_fp16")]; - tensor attn_weights_21_cast_fp16 = add(x = attn_weights_19_cast_fp16, y = var_2083)[name = string("attn_weights_21_cast_fp16")]; - int32 var_3904 = const()[name = string("op_3904"), val = int32(-1)]; - tensor attn_weights_23_cast_fp16 = softmax(axis = var_3904, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; - bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; - bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; - tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = attn_weights_23_cast_fp16, y = x_59_cast_fp16)[name = string("attn_output_31_cast_fp16")]; - tensor var_3915_perm_0 = const()[name = string("op_3915_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_3919 = const()[name = string("op_3919"), val = tensor([1, 1, 1024])]; - tensor var_3915_cast_fp16 = transpose(perm = var_3915_perm_0, x = attn_output_31_cast_fp16)[name = string("transpose_136")]; - tensor attn_output_35_cast_fp16 = reshape(shape = var_3919, x = var_3915_cast_fp16)[name = string("attn_output_35_cast_fp16")]; - tensor var_3924 = const()[name = string("op_3924"), val = tensor([0, 2, 1])]; - string var_3940_pad_type_0 = const()[name = string("op_3940_pad_type_0"), val = string("valid")]; - int32 var_3940_groups_0 = const()[name = string("op_3940_groups_0"), val = int32(1)]; - tensor var_3940_strides_0 = const()[name = string("op_3940_strides_0"), val = tensor([1])]; - tensor var_3940_pad_0 = const()[name = string("op_3940_pad_0"), val = tensor([0, 0])]; - tensor var_3940_dilations_0 = const()[name = string("op_3940_dilations_0"), val = tensor([1])]; - tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100755456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101640256))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_3925_cast_fp16 = transpose(perm = var_3924, x = attn_output_35_cast_fp16)[name = string("transpose_135")]; - tensor var_3940_cast_fp16 = conv(dilations = var_3940_dilations_0, groups = var_3940_groups_0, pad = var_3940_pad_0, pad_type = var_3940_pad_type_0, strides = var_3940_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_3925_cast_fp16)[name = string("op_3940_cast_fp16")]; - tensor var_3944 = const()[name = string("op_3944"), val = tensor([0, 2, 1])]; - int32 var_3955 = const()[name = string("op_3955"), val = int32(-1)]; - fp16 const_140_promoted_to_fp16 = const()[name = string("const_140_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_57_cast_fp16 = transpose(perm = var_3944, x = var_3940_cast_fp16)[name = string("transpose_134")]; - tensor var_3957_cast_fp16 = mul(x = hidden_states_57_cast_fp16, y = const_140_promoted_to_fp16)[name = string("op_3957_cast_fp16")]; - bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; - tensor input_71_cast_fp16 = concat(axis = var_3955, interleave = input_71_interleave_0, values = (hidden_states_57_cast_fp16, var_3957_cast_fp16))[name = string("input_71_cast_fp16")]; - tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; - fp16 var_3952_to_fp16 = const()[name = string("op_3952_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_3952_to_fp16, x = input_71_cast_fp16)[name = string("normed_85_cast_fp16")]; - tensor normed_87_begin_0 = const()[name = string("normed_87_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_87_end_0 = const()[name = string("normed_87_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_87_end_mask_0 = const()[name = string("normed_87_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_87_cast_fp16 = slice_by_index(begin = normed_87_begin_0, end = normed_87_end_0, end_mask = normed_87_end_mask_0, x = normed_85_cast_fp16)[name = string("normed_87_cast_fp16")]; - tensor var_3971_to_fp16 = const()[name = string("op_3971_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101658752)))]; - tensor attn_output_39_cast_fp16 = mul(x = normed_87_cast_fp16, y = var_3971_to_fp16)[name = string("attn_output_39_cast_fp16")]; - tensor hidden_states_59_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = attn_output_39_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; - int32 var_3984 = const()[name = string("op_3984"), val = int32(-1)]; - fp16 const_144_promoted_to_fp16 = const()[name = string("const_144_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3986_cast_fp16 = mul(x = hidden_states_59_cast_fp16, y = const_144_promoted_to_fp16)[name = string("op_3986_cast_fp16")]; - bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; - tensor input_73_cast_fp16 = concat(axis = var_3984, interleave = input_73_interleave_0, values = (hidden_states_59_cast_fp16, var_3986_cast_fp16))[name = string("input_73_cast_fp16")]; - tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; - fp16 var_3981_to_fp16 = const()[name = string("op_3981_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_3981_to_fp16, x = input_73_cast_fp16)[name = string("normed_89_cast_fp16")]; - tensor normed_91_begin_0 = const()[name = string("normed_91_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_91_end_0 = const()[name = string("normed_91_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_91_end_mask_0 = const()[name = string("normed_91_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_91_cast_fp16 = slice_by_index(begin = normed_91_begin_0, end = normed_91_end_0, end_mask = normed_91_end_mask_0, x = normed_89_cast_fp16)[name = string("normed_91_cast_fp16")]; - tensor var_4000_to_fp16 = const()[name = string("op_4000_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101661120)))]; - tensor x_61_cast_fp16 = mul(x = normed_91_cast_fp16, y = var_4000_to_fp16)[name = string("x_61_cast_fp16")]; - tensor var_4012 = const()[name = string("op_4012"), val = tensor([0, 2, 1])]; - tensor input_75_axes_0 = const()[name = string("input_75_axes_0"), val = tensor([2])]; - tensor var_4013_cast_fp16 = transpose(perm = var_4012, x = x_61_cast_fp16)[name = string("transpose_133")]; - tensor input_75_cast_fp16 = expand_dims(axes = input_75_axes_0, x = var_4013_cast_fp16)[name = string("input_75_cast_fp16")]; - string x_63_pad_type_0 = const()[name = string("x_63_pad_type_0"), val = string("valid")]; - tensor x_63_strides_0 = const()[name = string("x_63_strides_0"), val = tensor([1, 1])]; - tensor x_63_pad_0 = const()[name = string("x_63_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_63_dilations_0 = const()[name = string("x_63_dilations_0"), val = tensor([1, 1])]; - int32 x_63_groups_0 = const()[name = string("x_63_groups_0"), val = int32(1)]; - tensor model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101663488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107635520))))[name = string("model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_63_cast_fp16 = conv(dilations = x_63_dilations_0, groups = x_63_groups_0, pad = x_63_pad_0, pad_type = x_63_pad_type_0, strides = x_63_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("x_63_cast_fp16")]; - string b_7_pad_type_0 = const()[name = string("b_7_pad_type_0"), val = string("valid")]; - tensor b_7_strides_0 = const()[name = string("b_7_strides_0"), val = tensor([1, 1])]; - tensor b_7_pad_0 = const()[name = string("b_7_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_7_dilations_0 = const()[name = string("b_7_dilations_0"), val = tensor([1, 1])]; - int32 b_7_groups_0 = const()[name = string("b_7_groups_0"), val = int32(1)]; - tensor model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107746176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113718208))))[name = string("model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_7_cast_fp16 = conv(dilations = b_7_dilations_0, groups = b_7_groups_0, pad = b_7_pad_0, pad_type = b_7_pad_type_0, strides = b_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("b_7_cast_fp16")]; - string var_4038_mode_0 = const()[name = string("op_4038_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_4038_cast_fp16 = gelu(mode = var_4038_mode_0, x = x_63_cast_fp16)[name = string("op_4038_cast_fp16")]; - tensor input_77_cast_fp16 = mul(x = var_4038_cast_fp16, y = b_7_cast_fp16)[name = string("input_77_cast_fp16")]; - string e_7_pad_type_0 = const()[name = string("e_7_pad_type_0"), val = string("valid")]; - tensor e_7_strides_0 = const()[name = string("e_7_strides_0"), val = tensor([1, 1])]; - tensor e_7_pad_0 = const()[name = string("e_7_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_7_dilations_0 = const()[name = string("e_7_dilations_0"), val = tensor([1, 1])]; - int32 e_7_groups_0 = const()[name = string("e_7_groups_0"), val = int32(1)]; - tensor model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113828864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119800896))))[name = string("model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_7_cast_fp16 = conv(dilations = e_7_dilations_0, groups = e_7_groups_0, pad = e_7_pad_0, pad_type = e_7_pad_type_0, strides = e_7_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_77_cast_fp16)[name = string("e_7_cast_fp16")]; - tensor var_4046_axes_0 = const()[name = string("op_4046_axes_0"), val = tensor([2])]; - tensor var_4046_cast_fp16 = squeeze(axes = var_4046_axes_0, x = e_7_cast_fp16)[name = string("op_4046_cast_fp16")]; - tensor var_4047 = const()[name = string("op_4047"), val = tensor([0, 2, 1])]; - int32 var_4058 = const()[name = string("op_4058"), val = int32(-1)]; - fp16 const_148_promoted_to_fp16 = const()[name = string("const_148_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_61_cast_fp16 = transpose(perm = var_4047, x = var_4046_cast_fp16)[name = string("transpose_132")]; - tensor var_4060_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_148_promoted_to_fp16)[name = string("op_4060_cast_fp16")]; - bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)]; - tensor input_79_cast_fp16 = concat(axis = var_4058, interleave = input_79_interleave_0, values = (hidden_states_61_cast_fp16, var_4060_cast_fp16))[name = string("input_79_cast_fp16")]; - tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; - fp16 var_4055_to_fp16 = const()[name = string("op_4055_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_4055_to_fp16, x = input_79_cast_fp16)[name = string("normed_93_cast_fp16")]; - tensor normed_95_begin_0 = const()[name = string("normed_95_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_95_end_0 = const()[name = string("normed_95_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_95_end_mask_0 = const()[name = string("normed_95_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_95_cast_fp16 = slice_by_index(begin = normed_95_begin_0, end = normed_95_end_0, end_mask = normed_95_end_mask_0, x = normed_93_cast_fp16)[name = string("normed_95_cast_fp16")]; - tensor var_4074_to_fp16 = const()[name = string("op_4074_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119819392)))]; - tensor hidden_states_63_cast_fp16 = mul(x = normed_95_cast_fp16, y = var_4074_to_fp16)[name = string("hidden_states_63_cast_fp16")]; - tensor hidden_states_65_cast_fp16 = add(x = hidden_states_59_cast_fp16, y = hidden_states_63_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; - int32 var_4125 = const()[name = string("op_4125"), val = int32(-1)]; - fp16 const_152_promoted_to_fp16 = const()[name = string("const_152_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4127_cast_fp16 = mul(x = hidden_states_65_cast_fp16, y = const_152_promoted_to_fp16)[name = string("op_4127_cast_fp16")]; - bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; - tensor input_81_cast_fp16 = concat(axis = var_4125, interleave = input_81_interleave_0, values = (hidden_states_65_cast_fp16, var_4127_cast_fp16))[name = string("input_81_cast_fp16")]; - tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; - fp16 var_4122_to_fp16 = const()[name = string("op_4122_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_4122_to_fp16, x = input_81_cast_fp16)[name = string("normed_97_cast_fp16")]; - tensor normed_99_begin_0 = const()[name = string("normed_99_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_99_end_0 = const()[name = string("normed_99_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_99_end_mask_0 = const()[name = string("normed_99_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_99_cast_fp16 = slice_by_index(begin = normed_99_begin_0, end = normed_99_end_0, end_mask = normed_99_end_mask_0, x = normed_97_cast_fp16)[name = string("normed_99_cast_fp16")]; - tensor var_4141_to_fp16 = const()[name = string("op_4141_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119821760)))]; - tensor hidden_states_67_cast_fp16 = mul(x = normed_99_cast_fp16, y = var_4141_to_fp16)[name = string("hidden_states_67_cast_fp16")]; - tensor var_4146 = const()[name = string("op_4146"), val = tensor([0, 2, 1])]; - tensor var_4149_axes_0 = const()[name = string("op_4149_axes_0"), val = tensor([2])]; - tensor var_4147_cast_fp16 = transpose(perm = var_4146, x = hidden_states_67_cast_fp16)[name = string("transpose_131")]; - tensor var_4149_cast_fp16 = expand_dims(axes = var_4149_axes_0, x = var_4147_cast_fp16)[name = string("op_4149_cast_fp16")]; - string var_4165_pad_type_0 = const()[name = string("op_4165_pad_type_0"), val = string("valid")]; - tensor var_4165_strides_0 = const()[name = string("op_4165_strides_0"), val = tensor([1, 1])]; - tensor var_4165_pad_0 = const()[name = string("op_4165_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_4165_dilations_0 = const()[name = string("op_4165_dilations_0"), val = tensor([1, 1])]; - int32 var_4165_groups_0 = const()[name = string("op_4165_groups_0"), val = int32(1)]; - tensor var_4165 = conv(dilations = var_4165_dilations_0, groups = var_4165_groups_0, pad = var_4165_pad_0, pad_type = var_4165_pad_type_0, strides = var_4165_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_4149_cast_fp16)[name = string("op_4165")]; - tensor var_4170 = const()[name = string("op_4170"), val = tensor([1, 4, 1, 256])]; - tensor var_4171 = reshape(shape = var_4170, x = var_4165)[name = string("op_4171")]; - string var_4187_pad_type_0 = const()[name = string("op_4187_pad_type_0"), val = string("valid")]; - tensor var_4187_strides_0 = const()[name = string("op_4187_strides_0"), val = tensor([1, 1])]; - tensor var_4187_pad_0 = const()[name = string("op_4187_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_4187_dilations_0 = const()[name = string("op_4187_dilations_0"), val = tensor([1, 1])]; - int32 var_4187_groups_0 = const()[name = string("op_4187_groups_0"), val = int32(1)]; - tensor var_4187 = conv(dilations = var_4187_dilations_0, groups = var_4187_groups_0, pad = var_4187_pad_0, pad_type = var_4187_pad_type_0, strides = var_4187_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_4149_cast_fp16)[name = string("op_4187")]; - tensor var_4192 = const()[name = string("op_4192"), val = tensor([1, 1, 1, 256])]; - tensor var_4193 = reshape(shape = var_4192, x = var_4187)[name = string("op_4193")]; - string var_4209_pad_type_0 = const()[name = string("op_4209_pad_type_0"), val = string("valid")]; - tensor var_4209_strides_0 = const()[name = string("op_4209_strides_0"), val = tensor([1, 1])]; - tensor var_4209_pad_0 = const()[name = string("op_4209_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_4209_dilations_0 = const()[name = string("op_4209_dilations_0"), val = tensor([1, 1])]; - int32 var_4209_groups_0 = const()[name = string("op_4209_groups_0"), val = int32(1)]; - tensor var_4209 = conv(dilations = var_4209_dilations_0, groups = var_4209_groups_0, pad = var_4209_pad_0, pad_type = var_4209_pad_type_0, strides = var_4209_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_4149_cast_fp16)[name = string("op_4209")]; - tensor var_4214 = const()[name = string("op_4214"), val = tensor([1, 1, 1, 256])]; - tensor var_4215 = reshape(shape = var_4214, x = var_4209)[name = string("op_4215")]; - int32 var_4230 = const()[name = string("op_4230"), val = int32(-1)]; - fp16 const_156_promoted = const()[name = string("const_156_promoted"), val = fp16(-0x1p+0)]; - tensor var_4232 = mul(x = var_4171, y = const_156_promoted)[name = string("op_4232")]; - bool input_85_interleave_0 = const()[name = string("input_85_interleave_0"), val = bool(false)]; - tensor input_85 = concat(axis = var_4230, interleave = input_85_interleave_0, values = (var_4171, var_4232))[name = string("input_85")]; - tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; - fp16 var_4227_to_fp16 = const()[name = string("op_4227_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_4227_to_fp16, x = input_85)[name = string("normed_101_cast_fp16")]; - tensor normed_103_begin_0 = const()[name = string("normed_103_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_103_end_0 = const()[name = string("normed_103_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_103_end_mask_0 = const()[name = string("normed_103_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_103 = slice_by_index(begin = normed_103_begin_0, end = normed_103_end_0, end_mask = normed_103_end_mask_0, x = normed_101_cast_fp16)[name = string("normed_103")]; - tensor var_4246_to_fp16 = const()[name = string("op_4246_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119824128)))]; - tensor q_9_cast_fp16 = mul(x = normed_103, y = var_4246_to_fp16)[name = string("q_9_cast_fp16")]; - int32 var_4257 = const()[name = string("op_4257"), val = int32(-1)]; - fp16 const_160_promoted = const()[name = string("const_160_promoted"), val = fp16(-0x1p+0)]; - tensor var_4259 = mul(x = var_4193, y = const_160_promoted)[name = string("op_4259")]; - bool input_87_interleave_0 = const()[name = string("input_87_interleave_0"), val = bool(false)]; - tensor input_87 = concat(axis = var_4257, interleave = input_87_interleave_0, values = (var_4193, var_4259))[name = string("input_87")]; - tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; - fp16 var_4254_to_fp16 = const()[name = string("op_4254_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_4254_to_fp16, x = input_87)[name = string("normed_105_cast_fp16")]; - tensor normed_107_begin_0 = const()[name = string("normed_107_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_107_end_0 = const()[name = string("normed_107_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_107_end_mask_0 = const()[name = string("normed_107_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_107 = slice_by_index(begin = normed_107_begin_0, end = normed_107_end_0, end_mask = normed_107_end_mask_0, x = normed_105_cast_fp16)[name = string("normed_107")]; - tensor var_4273_to_fp16 = const()[name = string("op_4273_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119824704)))]; - tensor k_9_cast_fp16 = mul(x = normed_107, y = var_4273_to_fp16)[name = string("k_9_cast_fp16")]; - tensor var_4275_cast_fp16 = mul(x = q_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4275_cast_fp16")]; - tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9_cast_fp16)[name = string("x1_17_cast_fp16")]; - tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9_cast_fp16)[name = string("x2_17_cast_fp16")]; - fp16 const_166_promoted_to_fp16 = const()[name = string("const_166_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4296_cast_fp16 = mul(x = x2_17_cast_fp16, y = const_166_promoted_to_fp16)[name = string("op_4296_cast_fp16")]; - int32 var_4298 = const()[name = string("op_4298"), val = int32(-1)]; - bool var_4299_interleave_0 = const()[name = string("op_4299_interleave_0"), val = bool(false)]; - tensor var_4299_cast_fp16 = concat(axis = var_4298, interleave = var_4299_interleave_0, values = (var_4296_cast_fp16, x1_17_cast_fp16))[name = string("op_4299_cast_fp16")]; - tensor var_4300_cast_fp16 = mul(x = var_4299_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4300_cast_fp16")]; - tensor query_states_17_cast_fp16 = add(x = var_4275_cast_fp16, y = var_4300_cast_fp16)[name = string("query_states_17_cast_fp16")]; - tensor var_4303_cast_fp16 = mul(x = k_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4303_cast_fp16")]; - tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9_cast_fp16)[name = string("x1_19_cast_fp16")]; - tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9_cast_fp16)[name = string("x2_19_cast_fp16")]; - fp16 const_169_promoted_to_fp16 = const()[name = string("const_169_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4324_cast_fp16 = mul(x = x2_19_cast_fp16, y = const_169_promoted_to_fp16)[name = string("op_4324_cast_fp16")]; - int32 var_4326 = const()[name = string("op_4326"), val = int32(-1)]; - bool var_4327_interleave_0 = const()[name = string("op_4327_interleave_0"), val = bool(false)]; - tensor var_4327_cast_fp16 = concat(axis = var_4326, interleave = var_4327_interleave_0, values = (var_4324_cast_fp16, x1_19_cast_fp16))[name = string("op_4327_cast_fp16")]; - tensor var_4328_cast_fp16 = mul(x = var_4327_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4328_cast_fp16")]; - tensor key_states_17_cast_fp16 = add(x = var_4303_cast_fp16, y = var_4328_cast_fp16)[name = string("key_states_17_cast_fp16")]; - tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; - tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; - tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; - tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; - int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; - bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; - tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_34")]; - tensor concat_35_values1_0 = const()[name = string("concat_35_values1_0"), val = tensor([0])]; - tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; - int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; - bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; - tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_52, concat_35_values1_0, var_1909, concat_35_values3_0))[name = string("concat_35")]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_9_stride_0, update = key_states_17_cast_fp16, x = coreml_update_state_59)[name = string("model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_8_write_state")]; - tensor coreml_update_state_60 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_8")]; - tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([26])]; - tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; - tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; - tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([27])]; - int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; - bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; - tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_38")]; - tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; - tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; - int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; - bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; - tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_58, concat_39_values1_0, var_1909, concat_39_values3_0))[name = string("concat_39")]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_10_stride_0, update = var_4215, x = coreml_update_state_60)[name = string("model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_9_write_state")]; - tensor coreml_update_state_61 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_9")]; - tensor var_4383_begin_0 = const()[name = string("op_4383_begin_0"), val = tensor([4, 0, 0, 0])]; - tensor var_4383_end_0 = const()[name = string("op_4383_end_0"), val = tensor([5, 1, 512, 256])]; - tensor var_4383_end_mask_0 = const()[name = string("op_4383_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_4383_cast_fp16 = slice_by_index(begin = var_4383_begin_0, end = var_4383_end_0, end_mask = var_4383_end_mask_0, x = coreml_update_state_61)[name = string("op_4383_cast_fp16")]; - tensor var_4390_begin_0 = const()[name = string("op_4390_begin_0"), val = tensor([26, 0, 0, 0])]; - tensor var_4390_end_0 = const()[name = string("op_4390_end_0"), val = tensor([27, 1, 512, 256])]; - tensor var_4390_end_mask_0 = const()[name = string("op_4390_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_4390_cast_fp16 = slice_by_index(begin = var_4390_begin_0, end = var_4390_end_0, end_mask = var_4390_end_mask_0, x = coreml_update_state_61)[name = string("op_4390_cast_fp16")]; - tensor var_4427 = const()[name = string("op_4427"), val = tensor([1, 4, 1, 1])]; - tensor x_69_cast_fp16 = tile(reps = var_4427, x = var_4383_cast_fp16)[name = string("x_69_cast_fp16")]; - tensor var_4447 = const()[name = string("op_4447"), val = tensor([1, 4, 1, 1])]; - tensor x_75_cast_fp16 = tile(reps = var_4447, x = var_4390_cast_fp16)[name = string("x_75_cast_fp16")]; - bool var_4474_transpose_x_1 = const()[name = string("op_4474_transpose_x_1"), val = bool(false)]; - bool var_4474_transpose_y_1 = const()[name = string("op_4474_transpose_y_1"), val = bool(true)]; - tensor var_4474 = matmul(transpose_x = var_4474_transpose_x_1, transpose_y = var_4474_transpose_y_1, x = query_states_17_cast_fp16, y = x_69_cast_fp16)[name = string("op_4474")]; - fp16 var_4475_to_fp16 = const()[name = string("op_4475_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_25_cast_fp16 = mul(x = var_4474, y = var_4475_to_fp16)[name = string("attn_weights_25_cast_fp16")]; - tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = var_2083)[name = string("attn_weights_27_cast_fp16")]; - int32 var_4510 = const()[name = string("op_4510"), val = int32(-1)]; - tensor attn_weights_29_cast_fp16 = softmax(axis = var_4510, x = attn_weights_27_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; - bool attn_output_41_transpose_x_0 = const()[name = string("attn_output_41_transpose_x_0"), val = bool(false)]; - bool attn_output_41_transpose_y_0 = const()[name = string("attn_output_41_transpose_y_0"), val = bool(false)]; - tensor attn_output_41_cast_fp16 = matmul(transpose_x = attn_output_41_transpose_x_0, transpose_y = attn_output_41_transpose_y_0, x = attn_weights_29_cast_fp16, y = x_75_cast_fp16)[name = string("attn_output_41_cast_fp16")]; - tensor var_4521_perm_0 = const()[name = string("op_4521_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_4525 = const()[name = string("op_4525"), val = tensor([1, 1, 1024])]; - tensor var_4521_cast_fp16 = transpose(perm = var_4521_perm_0, x = attn_output_41_cast_fp16)[name = string("transpose_130")]; - tensor attn_output_45_cast_fp16 = reshape(shape = var_4525, x = var_4521_cast_fp16)[name = string("attn_output_45_cast_fp16")]; - tensor var_4530 = const()[name = string("op_4530"), val = tensor([0, 2, 1])]; - string var_4546_pad_type_0 = const()[name = string("op_4546_pad_type_0"), val = string("valid")]; - int32 var_4546_groups_0 = const()[name = string("op_4546_groups_0"), val = int32(1)]; - tensor var_4546_strides_0 = const()[name = string("op_4546_strides_0"), val = tensor([1])]; - tensor var_4546_pad_0 = const()[name = string("op_4546_pad_0"), val = tensor([0, 0])]; - tensor var_4546_dilations_0 = const()[name = string("op_4546_dilations_0"), val = tensor([1])]; - tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119825280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120710080))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_4531_cast_fp16 = transpose(perm = var_4530, x = attn_output_45_cast_fp16)[name = string("transpose_129")]; - tensor var_4546_cast_fp16 = conv(dilations = var_4546_dilations_0, groups = var_4546_groups_0, pad = var_4546_pad_0, pad_type = var_4546_pad_type_0, strides = var_4546_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_4531_cast_fp16)[name = string("op_4546_cast_fp16")]; - tensor var_4550 = const()[name = string("op_4550"), val = tensor([0, 2, 1])]; - int32 var_4561 = const()[name = string("op_4561"), val = int32(-1)]; - fp16 const_178_promoted_to_fp16 = const()[name = string("const_178_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_73_cast_fp16 = transpose(perm = var_4550, x = var_4546_cast_fp16)[name = string("transpose_128")]; - tensor var_4563_cast_fp16 = mul(x = hidden_states_73_cast_fp16, y = const_178_promoted_to_fp16)[name = string("op_4563_cast_fp16")]; - bool input_91_interleave_0 = const()[name = string("input_91_interleave_0"), val = bool(false)]; - tensor input_91_cast_fp16 = concat(axis = var_4561, interleave = input_91_interleave_0, values = (hidden_states_73_cast_fp16, var_4563_cast_fp16))[name = string("input_91_cast_fp16")]; - tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; - fp16 var_4558_to_fp16 = const()[name = string("op_4558_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_4558_to_fp16, x = input_91_cast_fp16)[name = string("normed_109_cast_fp16")]; - tensor normed_111_begin_0 = const()[name = string("normed_111_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_111_end_0 = const()[name = string("normed_111_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_111_end_mask_0 = const()[name = string("normed_111_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_111_cast_fp16 = slice_by_index(begin = normed_111_begin_0, end = normed_111_end_0, end_mask = normed_111_end_mask_0, x = normed_109_cast_fp16)[name = string("normed_111_cast_fp16")]; - tensor var_4577_to_fp16 = const()[name = string("op_4577_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120728576)))]; - tensor attn_output_49_cast_fp16 = mul(x = normed_111_cast_fp16, y = var_4577_to_fp16)[name = string("attn_output_49_cast_fp16")]; - tensor hidden_states_75_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = attn_output_49_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; - int32 var_4590 = const()[name = string("op_4590"), val = int32(-1)]; - fp16 const_182_promoted_to_fp16 = const()[name = string("const_182_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4592_cast_fp16 = mul(x = hidden_states_75_cast_fp16, y = const_182_promoted_to_fp16)[name = string("op_4592_cast_fp16")]; - bool input_93_interleave_0 = const()[name = string("input_93_interleave_0"), val = bool(false)]; - tensor input_93_cast_fp16 = concat(axis = var_4590, interleave = input_93_interleave_0, values = (hidden_states_75_cast_fp16, var_4592_cast_fp16))[name = string("input_93_cast_fp16")]; - tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; - fp16 var_4587_to_fp16 = const()[name = string("op_4587_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_4587_to_fp16, x = input_93_cast_fp16)[name = string("normed_113_cast_fp16")]; - tensor normed_115_begin_0 = const()[name = string("normed_115_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_115_end_0 = const()[name = string("normed_115_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_115_end_mask_0 = const()[name = string("normed_115_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_115_cast_fp16 = slice_by_index(begin = normed_115_begin_0, end = normed_115_end_0, end_mask = normed_115_end_mask_0, x = normed_113_cast_fp16)[name = string("normed_115_cast_fp16")]; - tensor var_4606_to_fp16 = const()[name = string("op_4606_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120730944)))]; - tensor x_77_cast_fp16 = mul(x = normed_115_cast_fp16, y = var_4606_to_fp16)[name = string("x_77_cast_fp16")]; - tensor var_4618 = const()[name = string("op_4618"), val = tensor([0, 2, 1])]; - tensor input_95_axes_0 = const()[name = string("input_95_axes_0"), val = tensor([2])]; - tensor var_4619_cast_fp16 = transpose(perm = var_4618, x = x_77_cast_fp16)[name = string("transpose_127")]; - tensor input_95_cast_fp16 = expand_dims(axes = input_95_axes_0, x = var_4619_cast_fp16)[name = string("input_95_cast_fp16")]; - string x_79_pad_type_0 = const()[name = string("x_79_pad_type_0"), val = string("valid")]; - tensor x_79_strides_0 = const()[name = string("x_79_strides_0"), val = tensor([1, 1])]; - tensor x_79_pad_0 = const()[name = string("x_79_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_79_dilations_0 = const()[name = string("x_79_dilations_0"), val = tensor([1, 1])]; - int32 x_79_groups_0 = const()[name = string("x_79_groups_0"), val = int32(1)]; - tensor model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120733312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126705344))))[name = string("model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_79_cast_fp16 = conv(dilations = x_79_dilations_0, groups = x_79_groups_0, pad = x_79_pad_0, pad_type = x_79_pad_type_0, strides = x_79_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_95_cast_fp16)[name = string("x_79_cast_fp16")]; - string b_9_pad_type_0 = const()[name = string("b_9_pad_type_0"), val = string("valid")]; - tensor b_9_strides_0 = const()[name = string("b_9_strides_0"), val = tensor([1, 1])]; - tensor b_9_pad_0 = const()[name = string("b_9_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_9_dilations_0 = const()[name = string("b_9_dilations_0"), val = tensor([1, 1])]; - int32 b_9_groups_0 = const()[name = string("b_9_groups_0"), val = int32(1)]; - tensor model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126816000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132788032))))[name = string("model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_9_cast_fp16 = conv(dilations = b_9_dilations_0, groups = b_9_groups_0, pad = b_9_pad_0, pad_type = b_9_pad_type_0, strides = b_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_95_cast_fp16)[name = string("b_9_cast_fp16")]; - string var_4644_mode_0 = const()[name = string("op_4644_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_4644_cast_fp16 = gelu(mode = var_4644_mode_0, x = x_79_cast_fp16)[name = string("op_4644_cast_fp16")]; - tensor input_97_cast_fp16 = mul(x = var_4644_cast_fp16, y = b_9_cast_fp16)[name = string("input_97_cast_fp16")]; - string e_9_pad_type_0 = const()[name = string("e_9_pad_type_0"), val = string("valid")]; - tensor e_9_strides_0 = const()[name = string("e_9_strides_0"), val = tensor([1, 1])]; - tensor e_9_pad_0 = const()[name = string("e_9_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_9_dilations_0 = const()[name = string("e_9_dilations_0"), val = tensor([1, 1])]; - int32 e_9_groups_0 = const()[name = string("e_9_groups_0"), val = int32(1)]; - tensor model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132898688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138870720))))[name = string("model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_9_cast_fp16 = conv(dilations = e_9_dilations_0, groups = e_9_groups_0, pad = e_9_pad_0, pad_type = e_9_pad_type_0, strides = e_9_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_97_cast_fp16)[name = string("e_9_cast_fp16")]; - tensor var_4652_axes_0 = const()[name = string("op_4652_axes_0"), val = tensor([2])]; - tensor var_4652_cast_fp16 = squeeze(axes = var_4652_axes_0, x = e_9_cast_fp16)[name = string("op_4652_cast_fp16")]; - tensor var_4653 = const()[name = string("op_4653"), val = tensor([0, 2, 1])]; - int32 var_4664 = const()[name = string("op_4664"), val = int32(-1)]; - fp16 const_186_promoted_to_fp16 = const()[name = string("const_186_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_77_cast_fp16 = transpose(perm = var_4653, x = var_4652_cast_fp16)[name = string("transpose_126")]; - tensor var_4666_cast_fp16 = mul(x = hidden_states_77_cast_fp16, y = const_186_promoted_to_fp16)[name = string("op_4666_cast_fp16")]; - bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)]; - tensor input_99_cast_fp16 = concat(axis = var_4664, interleave = input_99_interleave_0, values = (hidden_states_77_cast_fp16, var_4666_cast_fp16))[name = string("input_99_cast_fp16")]; - tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; - fp16 var_4661_to_fp16 = const()[name = string("op_4661_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_4661_to_fp16, x = input_99_cast_fp16)[name = string("normed_117_cast_fp16")]; - tensor normed_119_begin_0 = const()[name = string("normed_119_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_119_end_0 = const()[name = string("normed_119_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_119_end_mask_0 = const()[name = string("normed_119_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_119_cast_fp16 = slice_by_index(begin = normed_119_begin_0, end = normed_119_end_0, end_mask = normed_119_end_mask_0, x = normed_117_cast_fp16)[name = string("normed_119_cast_fp16")]; - tensor var_4680_to_fp16 = const()[name = string("op_4680_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138889216)))]; - tensor hidden_states_79_cast_fp16 = mul(x = normed_119_cast_fp16, y = var_4680_to_fp16)[name = string("hidden_states_79_cast_fp16")]; - tensor hidden_states_81_cast_fp16 = add(x = hidden_states_75_cast_fp16, y = hidden_states_79_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; - int32 var_4692_axis_0 = const()[name = string("op_4692_axis_0"), val = int32(1)]; - int32 var_4692_batch_dims_0 = const()[name = string("op_4692_batch_dims_0"), val = int32(0)]; - bool var_4692_validate_indices_0 = const()[name = string("op_4692_validate_indices_0"), val = bool(false)]; - tensor var_4684_to_fp16 = const()[name = string("op_4684_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138891584)))]; - tensor var_4692_cast_fp16_cast_uint16 = gather(axis = var_4692_axis_0, batch_dims = var_4692_batch_dims_0, indices = current_pos_to_uint16, validate_indices = var_4692_validate_indices_0, x = var_4684_to_fp16)[name = string("op_4692_cast_fp16_cast_uint16")]; - tensor var_4697 = const()[name = string("op_4697"), val = tensor([1, 1, 1, -1])]; - tensor sin_21_cast_fp16 = reshape(shape = var_4697, x = var_4692_cast_fp16_cast_uint16)[name = string("sin_21_cast_fp16")]; - int32 var_4707_axis_0 = const()[name = string("op_4707_axis_0"), val = int32(1)]; - int32 var_4707_batch_dims_0 = const()[name = string("op_4707_batch_dims_0"), val = int32(0)]; - bool var_4707_validate_indices_0 = const()[name = string("op_4707_validate_indices_0"), val = bool(false)]; - tensor var_4699_to_fp16 = const()[name = string("op_4699_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143085952)))]; - tensor var_4707_cast_fp16_cast_uint16 = gather(axis = var_4707_axis_0, batch_dims = var_4707_batch_dims_0, indices = current_pos_to_uint16, validate_indices = var_4707_validate_indices_0, x = var_4699_to_fp16)[name = string("op_4707_cast_fp16_cast_uint16")]; - tensor var_4712 = const()[name = string("op_4712"), val = tensor([1, 1, 1, -1])]; - tensor cos_21_cast_fp16 = reshape(shape = var_4712, x = var_4707_cast_fp16_cast_uint16)[name = string("cos_21_cast_fp16")]; - int32 var_4733 = const()[name = string("op_4733"), val = int32(-1)]; - fp16 const_190_promoted_to_fp16 = const()[name = string("const_190_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4735_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_190_promoted_to_fp16)[name = string("op_4735_cast_fp16")]; - bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; - tensor input_101_cast_fp16 = concat(axis = var_4733, interleave = input_101_interleave_0, values = (hidden_states_81_cast_fp16, var_4735_cast_fp16))[name = string("input_101_cast_fp16")]; - tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; - fp16 var_4730_to_fp16 = const()[name = string("op_4730_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_4730_to_fp16, x = input_101_cast_fp16)[name = string("normed_121_cast_fp16")]; - tensor normed_123_begin_0 = const()[name = string("normed_123_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_123_end_0 = const()[name = string("normed_123_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_123_end_mask_0 = const()[name = string("normed_123_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_123_cast_fp16 = slice_by_index(begin = normed_123_begin_0, end = normed_123_end_0, end_mask = normed_123_end_mask_0, x = normed_121_cast_fp16)[name = string("normed_123_cast_fp16")]; - tensor var_4749_to_fp16 = const()[name = string("op_4749_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147280320)))]; - tensor hidden_states_83_cast_fp16 = mul(x = normed_123_cast_fp16, y = var_4749_to_fp16)[name = string("hidden_states_83_cast_fp16")]; - tensor var_4754 = const()[name = string("op_4754"), val = tensor([0, 2, 1])]; - tensor var_4757_axes_0 = const()[name = string("op_4757_axes_0"), val = tensor([2])]; - tensor var_4755_cast_fp16 = transpose(perm = var_4754, x = hidden_states_83_cast_fp16)[name = string("transpose_125")]; - tensor var_4757_cast_fp16 = expand_dims(axes = var_4757_axes_0, x = var_4755_cast_fp16)[name = string("op_4757_cast_fp16")]; - string var_4773_pad_type_0 = const()[name = string("op_4773_pad_type_0"), val = string("valid")]; - tensor var_4773_strides_0 = const()[name = string("op_4773_strides_0"), val = tensor([1, 1])]; - tensor var_4773_pad_0 = const()[name = string("op_4773_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_4773_dilations_0 = const()[name = string("op_4773_dilations_0"), val = tensor([1, 1])]; - int32 var_4773_groups_0 = const()[name = string("op_4773_groups_0"), val = int32(1)]; - tensor var_4773 = conv(dilations = var_4773_dilations_0, groups = var_4773_groups_0, pad = var_4773_pad_0, pad_type = var_4773_pad_type_0, strides = var_4773_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_4757_cast_fp16)[name = string("op_4773")]; - tensor var_4778 = const()[name = string("op_4778"), val = tensor([1, 4, 1, 256])]; - tensor var_4779 = reshape(shape = var_4778, x = var_4773)[name = string("op_4779")]; - string var_4795_pad_type_0 = const()[name = string("op_4795_pad_type_0"), val = string("valid")]; - tensor var_4795_strides_0 = const()[name = string("op_4795_strides_0"), val = tensor([1, 1])]; - tensor var_4795_pad_0 = const()[name = string("op_4795_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_4795_dilations_0 = const()[name = string("op_4795_dilations_0"), val = tensor([1, 1])]; - int32 var_4795_groups_0 = const()[name = string("op_4795_groups_0"), val = int32(1)]; - tensor var_4795 = conv(dilations = var_4795_dilations_0, groups = var_4795_groups_0, pad = var_4795_pad_0, pad_type = var_4795_pad_type_0, strides = var_4795_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_4757_cast_fp16)[name = string("op_4795")]; - tensor var_4800 = const()[name = string("op_4800"), val = tensor([1, 1, 1, 256])]; - tensor var_4801 = reshape(shape = var_4800, x = var_4795)[name = string("op_4801")]; - string var_4817_pad_type_0 = const()[name = string("op_4817_pad_type_0"), val = string("valid")]; - tensor var_4817_strides_0 = const()[name = string("op_4817_strides_0"), val = tensor([1, 1])]; - tensor var_4817_pad_0 = const()[name = string("op_4817_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_4817_dilations_0 = const()[name = string("op_4817_dilations_0"), val = tensor([1, 1])]; - int32 var_4817_groups_0 = const()[name = string("op_4817_groups_0"), val = int32(1)]; - tensor var_4817 = conv(dilations = var_4817_dilations_0, groups = var_4817_groups_0, pad = var_4817_pad_0, pad_type = var_4817_pad_type_0, strides = var_4817_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_4757_cast_fp16)[name = string("op_4817")]; - tensor var_4822 = const()[name = string("op_4822"), val = tensor([1, 1, 1, 256])]; - tensor var_4823 = reshape(shape = var_4822, x = var_4817)[name = string("op_4823")]; - int32 var_4838 = const()[name = string("op_4838"), val = int32(-1)]; - fp16 const_194_promoted = const()[name = string("const_194_promoted"), val = fp16(-0x1p+0)]; - tensor var_4840 = mul(x = var_4779, y = const_194_promoted)[name = string("op_4840")]; - bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; - tensor input_105 = concat(axis = var_4838, interleave = input_105_interleave_0, values = (var_4779, var_4840))[name = string("input_105")]; - tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; - fp16 var_4835_to_fp16 = const()[name = string("op_4835_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_4835_to_fp16, x = input_105)[name = string("normed_125_cast_fp16")]; - tensor normed_127_begin_0 = const()[name = string("normed_127_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_127_end_0 = const()[name = string("normed_127_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_127_end_mask_0 = const()[name = string("normed_127_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_127 = slice_by_index(begin = normed_127_begin_0, end = normed_127_end_0, end_mask = normed_127_end_mask_0, x = normed_125_cast_fp16)[name = string("normed_127")]; - tensor var_4854_to_fp16 = const()[name = string("op_4854_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147282688)))]; - tensor q_11_cast_fp16 = mul(x = normed_127, y = var_4854_to_fp16)[name = string("q_11_cast_fp16")]; - int32 var_4865 = const()[name = string("op_4865"), val = int32(-1)]; - fp16 const_198_promoted = const()[name = string("const_198_promoted"), val = fp16(-0x1p+0)]; - tensor var_4867 = mul(x = var_4801, y = const_198_promoted)[name = string("op_4867")]; - bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; - tensor input_107 = concat(axis = var_4865, interleave = input_107_interleave_0, values = (var_4801, var_4867))[name = string("input_107")]; - tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; - fp16 var_4862_to_fp16 = const()[name = string("op_4862_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_4862_to_fp16, x = input_107)[name = string("normed_129_cast_fp16")]; - tensor normed_131_begin_0 = const()[name = string("normed_131_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_131_end_0 = const()[name = string("normed_131_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_131_end_mask_0 = const()[name = string("normed_131_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_131 = slice_by_index(begin = normed_131_begin_0, end = normed_131_end_0, end_mask = normed_131_end_mask_0, x = normed_129_cast_fp16)[name = string("normed_131")]; - tensor var_4881_to_fp16 = const()[name = string("op_4881_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147283264)))]; - tensor k_11_cast_fp16 = mul(x = normed_131, y = var_4881_to_fp16)[name = string("k_11_cast_fp16")]; - tensor var_4883_cast_fp16 = mul(x = q_11_cast_fp16, y = cos_21_cast_fp16)[name = string("op_4883_cast_fp16")]; - tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11_cast_fp16)[name = string("x1_21_cast_fp16")]; - tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11_cast_fp16)[name = string("x2_21_cast_fp16")]; - fp16 const_204_promoted_to_fp16 = const()[name = string("const_204_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4904_cast_fp16 = mul(x = x2_21_cast_fp16, y = const_204_promoted_to_fp16)[name = string("op_4904_cast_fp16")]; - int32 var_4906 = const()[name = string("op_4906"), val = int32(-1)]; - bool var_4907_interleave_0 = const()[name = string("op_4907_interleave_0"), val = bool(false)]; - tensor var_4907_cast_fp16 = concat(axis = var_4906, interleave = var_4907_interleave_0, values = (var_4904_cast_fp16, x1_21_cast_fp16))[name = string("op_4907_cast_fp16")]; - tensor var_4908_cast_fp16 = mul(x = var_4907_cast_fp16, y = sin_21_cast_fp16)[name = string("op_4908_cast_fp16")]; - tensor query_states_21_cast_fp16 = add(x = var_4883_cast_fp16, y = var_4908_cast_fp16)[name = string("query_states_21_cast_fp16")]; - tensor var_4911_cast_fp16 = mul(x = k_11_cast_fp16, y = cos_21_cast_fp16)[name = string("op_4911_cast_fp16")]; - tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11_cast_fp16)[name = string("x1_23_cast_fp16")]; - tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11_cast_fp16)[name = string("x2_23_cast_fp16")]; - fp16 const_207_promoted_to_fp16 = const()[name = string("const_207_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4932_cast_fp16 = mul(x = x2_23_cast_fp16, y = const_207_promoted_to_fp16)[name = string("op_4932_cast_fp16")]; - int32 var_4934 = const()[name = string("op_4934"), val = int32(-1)]; - bool var_4935_interleave_0 = const()[name = string("op_4935_interleave_0"), val = bool(false)]; - tensor var_4935_cast_fp16 = concat(axis = var_4934, interleave = var_4935_interleave_0, values = (var_4932_cast_fp16, x1_23_cast_fp16))[name = string("op_4935_cast_fp16")]; - tensor var_4936_cast_fp16 = mul(x = var_4935_cast_fp16, y = sin_21_cast_fp16)[name = string("op_4936_cast_fp16")]; - tensor key_states_21_cast_fp16 = add(x = var_4911_cast_fp16, y = var_4936_cast_fp16)[name = string("key_states_21_cast_fp16")]; - tensor read_state_1 = read_state(input = model_model_kv_cache_global)[name = string("read_state_1")]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_1_stride_0, update = key_states_21_cast_fp16, x = read_state_1)[name = string("model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_10_write_state")]; - tensor coreml_update_state_62 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_10")]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_2_stride_0, update = var_4823, x = coreml_update_state_62)[name = string("model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_11_write_state")]; - tensor coreml_update_state_63 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_11")]; - tensor var_4991_begin_0 = const()[name = string("op_4991_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_4991_end_0 = const()[name = string("op_4991_end_0"), val = tensor([1, 1, 4096, 256])]; - tensor var_4991_end_mask_0 = const()[name = string("op_4991_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_4991_cast_fp16 = slice_by_index(begin = var_4991_begin_0, end = var_4991_end_0, end_mask = var_4991_end_mask_0, x = coreml_update_state_63)[name = string("op_4991_cast_fp16")]; - tensor var_4998_begin_0 = const()[name = string("op_4998_begin_0"), val = tensor([4, 0, 0, 0])]; - tensor var_4998_end_0 = const()[name = string("op_4998_end_0"), val = tensor([5, 1, 4096, 256])]; - tensor var_4998_end_mask_0 = const()[name = string("op_4998_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_4998_cast_fp16 = slice_by_index(begin = var_4998_begin_0, end = var_4998_end_0, end_mask = var_4998_end_mask_0, x = coreml_update_state_63)[name = string("op_4998_cast_fp16")]; - tensor var_5035 = const()[name = string("op_5035"), val = tensor([1, 4, 1, 1])]; - tensor x_85_cast_fp16 = tile(reps = var_5035, x = var_4991_cast_fp16)[name = string("x_85_cast_fp16")]; - tensor var_5055 = const()[name = string("op_5055"), val = tensor([1, 4, 1, 1])]; - tensor x_91_cast_fp16 = tile(reps = var_5055, x = var_4998_cast_fp16)[name = string("x_91_cast_fp16")]; - bool var_5082_transpose_x_1 = const()[name = string("op_5082_transpose_x_1"), val = bool(false)]; - bool var_5082_transpose_y_1 = const()[name = string("op_5082_transpose_y_1"), val = bool(true)]; - tensor var_5082 = matmul(transpose_x = var_5082_transpose_x_1, transpose_y = var_5082_transpose_y_1, x = query_states_21_cast_fp16, y = x_85_cast_fp16)[name = string("op_5082")]; - fp16 var_5083_to_fp16 = const()[name = string("op_5083_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_31_cast_fp16 = mul(x = var_5082, y = var_5083_to_fp16)[name = string("attn_weights_31_cast_fp16")]; - tensor attn_weights_33_cast_fp16 = add(x = attn_weights_31_cast_fp16, y = causal_mask)[name = string("attn_weights_33_cast_fp16")]; - int32 var_5118 = const()[name = string("op_5118"), val = int32(-1)]; - tensor attn_weights_35_cast_fp16 = softmax(axis = var_5118, x = attn_weights_33_cast_fp16)[name = string("attn_weights_35_cast_fp16")]; - bool attn_output_51_transpose_x_0 = const()[name = string("attn_output_51_transpose_x_0"), val = bool(false)]; - bool attn_output_51_transpose_y_0 = const()[name = string("attn_output_51_transpose_y_0"), val = bool(false)]; - tensor attn_output_51_cast_fp16 = matmul(transpose_x = attn_output_51_transpose_x_0, transpose_y = attn_output_51_transpose_y_0, x = attn_weights_35_cast_fp16, y = x_91_cast_fp16)[name = string("attn_output_51_cast_fp16")]; - tensor var_5129_perm_0 = const()[name = string("op_5129_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_5133 = const()[name = string("op_5133"), val = tensor([1, 1, 1024])]; - tensor var_5129_cast_fp16 = transpose(perm = var_5129_perm_0, x = attn_output_51_cast_fp16)[name = string("transpose_124")]; - tensor attn_output_55_cast_fp16 = reshape(shape = var_5133, x = var_5129_cast_fp16)[name = string("attn_output_55_cast_fp16")]; - tensor var_5138 = const()[name = string("op_5138"), val = tensor([0, 2, 1])]; - string var_5154_pad_type_0 = const()[name = string("op_5154_pad_type_0"), val = string("valid")]; - int32 var_5154_groups_0 = const()[name = string("op_5154_groups_0"), val = int32(1)]; - tensor var_5154_strides_0 = const()[name = string("op_5154_strides_0"), val = tensor([1])]; - tensor var_5154_pad_0 = const()[name = string("op_5154_pad_0"), val = tensor([0, 0])]; - tensor var_5154_dilations_0 = const()[name = string("op_5154_dilations_0"), val = tensor([1])]; - tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147283840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148168640))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_5139_cast_fp16 = transpose(perm = var_5138, x = attn_output_55_cast_fp16)[name = string("transpose_123")]; - tensor var_5154_cast_fp16 = conv(dilations = var_5154_dilations_0, groups = var_5154_groups_0, pad = var_5154_pad_0, pad_type = var_5154_pad_type_0, strides = var_5154_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_5139_cast_fp16)[name = string("op_5154_cast_fp16")]; - tensor var_5158 = const()[name = string("op_5158"), val = tensor([0, 2, 1])]; - int32 var_5169 = const()[name = string("op_5169"), val = int32(-1)]; - fp16 const_216_promoted_to_fp16 = const()[name = string("const_216_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_89_cast_fp16 = transpose(perm = var_5158, x = var_5154_cast_fp16)[name = string("transpose_122")]; - tensor var_5171_cast_fp16 = mul(x = hidden_states_89_cast_fp16, y = const_216_promoted_to_fp16)[name = string("op_5171_cast_fp16")]; - bool input_111_interleave_0 = const()[name = string("input_111_interleave_0"), val = bool(false)]; - tensor input_111_cast_fp16 = concat(axis = var_5169, interleave = input_111_interleave_0, values = (hidden_states_89_cast_fp16, var_5171_cast_fp16))[name = string("input_111_cast_fp16")]; - tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; - fp16 var_5166_to_fp16 = const()[name = string("op_5166_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_5166_to_fp16, x = input_111_cast_fp16)[name = string("normed_133_cast_fp16")]; - tensor normed_135_begin_0 = const()[name = string("normed_135_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_135_end_0 = const()[name = string("normed_135_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_135_end_mask_0 = const()[name = string("normed_135_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_135_cast_fp16 = slice_by_index(begin = normed_135_begin_0, end = normed_135_end_0, end_mask = normed_135_end_mask_0, x = normed_133_cast_fp16)[name = string("normed_135_cast_fp16")]; - tensor var_5185_to_fp16 = const()[name = string("op_5185_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148187136)))]; - tensor attn_output_59_cast_fp16 = mul(x = normed_135_cast_fp16, y = var_5185_to_fp16)[name = string("attn_output_59_cast_fp16")]; - tensor hidden_states_91_cast_fp16 = add(x = hidden_states_81_cast_fp16, y = attn_output_59_cast_fp16)[name = string("hidden_states_91_cast_fp16")]; - int32 var_5198 = const()[name = string("op_5198"), val = int32(-1)]; - fp16 const_220_promoted_to_fp16 = const()[name = string("const_220_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5200_cast_fp16 = mul(x = hidden_states_91_cast_fp16, y = const_220_promoted_to_fp16)[name = string("op_5200_cast_fp16")]; - bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; - tensor input_113_cast_fp16 = concat(axis = var_5198, interleave = input_113_interleave_0, values = (hidden_states_91_cast_fp16, var_5200_cast_fp16))[name = string("input_113_cast_fp16")]; - tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; - fp16 var_5195_to_fp16 = const()[name = string("op_5195_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_5195_to_fp16, x = input_113_cast_fp16)[name = string("normed_137_cast_fp16")]; - tensor normed_139_begin_0 = const()[name = string("normed_139_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_139_end_0 = const()[name = string("normed_139_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_139_end_mask_0 = const()[name = string("normed_139_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_139_cast_fp16 = slice_by_index(begin = normed_139_begin_0, end = normed_139_end_0, end_mask = normed_139_end_mask_0, x = normed_137_cast_fp16)[name = string("normed_139_cast_fp16")]; - tensor var_5214_to_fp16 = const()[name = string("op_5214_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148189504)))]; - tensor x_93_cast_fp16 = mul(x = normed_139_cast_fp16, y = var_5214_to_fp16)[name = string("x_93_cast_fp16")]; - tensor var_5226 = const()[name = string("op_5226"), val = tensor([0, 2, 1])]; - tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; - tensor var_5227_cast_fp16 = transpose(perm = var_5226, x = x_93_cast_fp16)[name = string("transpose_121")]; - tensor input_115_cast_fp16 = expand_dims(axes = input_115_axes_0, x = var_5227_cast_fp16)[name = string("input_115_cast_fp16")]; - string x_95_pad_type_0 = const()[name = string("x_95_pad_type_0"), val = string("valid")]; - tensor x_95_strides_0 = const()[name = string("x_95_strides_0"), val = tensor([1, 1])]; - tensor x_95_pad_0 = const()[name = string("x_95_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_95_dilations_0 = const()[name = string("x_95_dilations_0"), val = tensor([1, 1])]; - int32 x_95_groups_0 = const()[name = string("x_95_groups_0"), val = int32(1)]; - tensor model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148191872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154163904))))[name = string("model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_95_cast_fp16 = conv(dilations = x_95_dilations_0, groups = x_95_groups_0, pad = x_95_pad_0, pad_type = x_95_pad_type_0, strides = x_95_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_115_cast_fp16)[name = string("x_95_cast_fp16")]; - string b_11_pad_type_0 = const()[name = string("b_11_pad_type_0"), val = string("valid")]; - tensor b_11_strides_0 = const()[name = string("b_11_strides_0"), val = tensor([1, 1])]; - tensor b_11_pad_0 = const()[name = string("b_11_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_11_dilations_0 = const()[name = string("b_11_dilations_0"), val = tensor([1, 1])]; - int32 b_11_groups_0 = const()[name = string("b_11_groups_0"), val = int32(1)]; - tensor model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154274560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160246592))))[name = string("model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_11_cast_fp16 = conv(dilations = b_11_dilations_0, groups = b_11_groups_0, pad = b_11_pad_0, pad_type = b_11_pad_type_0, strides = b_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_115_cast_fp16)[name = string("b_11_cast_fp16")]; - string var_5252_mode_0 = const()[name = string("op_5252_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_5252_cast_fp16 = gelu(mode = var_5252_mode_0, x = x_95_cast_fp16)[name = string("op_5252_cast_fp16")]; - tensor input_117_cast_fp16 = mul(x = var_5252_cast_fp16, y = b_11_cast_fp16)[name = string("input_117_cast_fp16")]; - string e_11_pad_type_0 = const()[name = string("e_11_pad_type_0"), val = string("valid")]; - tensor e_11_strides_0 = const()[name = string("e_11_strides_0"), val = tensor([1, 1])]; - tensor e_11_pad_0 = const()[name = string("e_11_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_11_dilations_0 = const()[name = string("e_11_dilations_0"), val = tensor([1, 1])]; - int32 e_11_groups_0 = const()[name = string("e_11_groups_0"), val = int32(1)]; - tensor model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160357248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166329280))))[name = string("model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_11_cast_fp16 = conv(dilations = e_11_dilations_0, groups = e_11_groups_0, pad = e_11_pad_0, pad_type = e_11_pad_type_0, strides = e_11_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("e_11_cast_fp16")]; - tensor var_5260_axes_0 = const()[name = string("op_5260_axes_0"), val = tensor([2])]; - tensor var_5260_cast_fp16 = squeeze(axes = var_5260_axes_0, x = e_11_cast_fp16)[name = string("op_5260_cast_fp16")]; - tensor var_5261 = const()[name = string("op_5261"), val = tensor([0, 2, 1])]; - int32 var_5272 = const()[name = string("op_5272"), val = int32(-1)]; - fp16 const_224_promoted_to_fp16 = const()[name = string("const_224_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_93_cast_fp16 = transpose(perm = var_5261, x = var_5260_cast_fp16)[name = string("transpose_120")]; - tensor var_5274_cast_fp16 = mul(x = hidden_states_93_cast_fp16, y = const_224_promoted_to_fp16)[name = string("op_5274_cast_fp16")]; - bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; - tensor input_119_cast_fp16 = concat(axis = var_5272, interleave = input_119_interleave_0, values = (hidden_states_93_cast_fp16, var_5274_cast_fp16))[name = string("input_119_cast_fp16")]; - tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; - fp16 var_5269_to_fp16 = const()[name = string("op_5269_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_5269_to_fp16, x = input_119_cast_fp16)[name = string("normed_141_cast_fp16")]; - tensor normed_143_begin_0 = const()[name = string("normed_143_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_143_end_0 = const()[name = string("normed_143_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_143_end_mask_0 = const()[name = string("normed_143_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_143_cast_fp16 = slice_by_index(begin = normed_143_begin_0, end = normed_143_end_0, end_mask = normed_143_end_mask_0, x = normed_141_cast_fp16)[name = string("normed_143_cast_fp16")]; - tensor var_5288_to_fp16 = const()[name = string("op_5288_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166347776)))]; - tensor hidden_states_95_cast_fp16 = mul(x = normed_143_cast_fp16, y = var_5288_to_fp16)[name = string("hidden_states_95_cast_fp16")]; - tensor hidden_states_97_cast_fp16 = add(x = hidden_states_91_cast_fp16, y = hidden_states_95_cast_fp16)[name = string("hidden_states_97_cast_fp16")]; - int32 var_5339 = const()[name = string("op_5339"), val = int32(-1)]; - fp16 const_228_promoted_to_fp16 = const()[name = string("const_228_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5341_cast_fp16 = mul(x = hidden_states_97_cast_fp16, y = const_228_promoted_to_fp16)[name = string("op_5341_cast_fp16")]; - bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; - tensor input_121_cast_fp16 = concat(axis = var_5339, interleave = input_121_interleave_0, values = (hidden_states_97_cast_fp16, var_5341_cast_fp16))[name = string("input_121_cast_fp16")]; - tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; - fp16 var_5336_to_fp16 = const()[name = string("op_5336_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_5336_to_fp16, x = input_121_cast_fp16)[name = string("normed_145_cast_fp16")]; - tensor normed_147_begin_0 = const()[name = string("normed_147_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_147_end_0 = const()[name = string("normed_147_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_147_end_mask_0 = const()[name = string("normed_147_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_147_cast_fp16 = slice_by_index(begin = normed_147_begin_0, end = normed_147_end_0, end_mask = normed_147_end_mask_0, x = normed_145_cast_fp16)[name = string("normed_147_cast_fp16")]; - tensor var_5355_to_fp16 = const()[name = string("op_5355_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166350144)))]; - tensor hidden_states_99_cast_fp16 = mul(x = normed_147_cast_fp16, y = var_5355_to_fp16)[name = string("hidden_states_99_cast_fp16")]; - tensor var_5360 = const()[name = string("op_5360"), val = tensor([0, 2, 1])]; - tensor var_5363_axes_0 = const()[name = string("op_5363_axes_0"), val = tensor([2])]; - tensor var_5361_cast_fp16 = transpose(perm = var_5360, x = hidden_states_99_cast_fp16)[name = string("transpose_119")]; - tensor var_5363_cast_fp16 = expand_dims(axes = var_5363_axes_0, x = var_5361_cast_fp16)[name = string("op_5363_cast_fp16")]; - string var_5379_pad_type_0 = const()[name = string("op_5379_pad_type_0"), val = string("valid")]; - tensor var_5379_strides_0 = const()[name = string("op_5379_strides_0"), val = tensor([1, 1])]; - tensor var_5379_pad_0 = const()[name = string("op_5379_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_5379_dilations_0 = const()[name = string("op_5379_dilations_0"), val = tensor([1, 1])]; - int32 var_5379_groups_0 = const()[name = string("op_5379_groups_0"), val = int32(1)]; - tensor var_5379 = conv(dilations = var_5379_dilations_0, groups = var_5379_groups_0, pad = var_5379_pad_0, pad_type = var_5379_pad_type_0, strides = var_5379_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_5363_cast_fp16)[name = string("op_5379")]; - tensor var_5384 = const()[name = string("op_5384"), val = tensor([1, 4, 1, 256])]; - tensor var_5385 = reshape(shape = var_5384, x = var_5379)[name = string("op_5385")]; - string var_5401_pad_type_0 = const()[name = string("op_5401_pad_type_0"), val = string("valid")]; - tensor var_5401_strides_0 = const()[name = string("op_5401_strides_0"), val = tensor([1, 1])]; - tensor var_5401_pad_0 = const()[name = string("op_5401_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_5401_dilations_0 = const()[name = string("op_5401_dilations_0"), val = tensor([1, 1])]; - int32 var_5401_groups_0 = const()[name = string("op_5401_groups_0"), val = int32(1)]; - tensor var_5401 = conv(dilations = var_5401_dilations_0, groups = var_5401_groups_0, pad = var_5401_pad_0, pad_type = var_5401_pad_type_0, strides = var_5401_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_5363_cast_fp16)[name = string("op_5401")]; - tensor var_5406 = const()[name = string("op_5406"), val = tensor([1, 1, 1, 256])]; - tensor var_5407 = reshape(shape = var_5406, x = var_5401)[name = string("op_5407")]; - string var_5423_pad_type_0 = const()[name = string("op_5423_pad_type_0"), val = string("valid")]; - tensor var_5423_strides_0 = const()[name = string("op_5423_strides_0"), val = tensor([1, 1])]; - tensor var_5423_pad_0 = const()[name = string("op_5423_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_5423_dilations_0 = const()[name = string("op_5423_dilations_0"), val = tensor([1, 1])]; - int32 var_5423_groups_0 = const()[name = string("op_5423_groups_0"), val = int32(1)]; - tensor var_5423 = conv(dilations = var_5423_dilations_0, groups = var_5423_groups_0, pad = var_5423_pad_0, pad_type = var_5423_pad_type_0, strides = var_5423_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_5363_cast_fp16)[name = string("op_5423")]; - tensor var_5428 = const()[name = string("op_5428"), val = tensor([1, 1, 1, 256])]; - tensor var_5429 = reshape(shape = var_5428, x = var_5423)[name = string("op_5429")]; - int32 var_5444 = const()[name = string("op_5444"), val = int32(-1)]; - fp16 const_232_promoted = const()[name = string("const_232_promoted"), val = fp16(-0x1p+0)]; - tensor var_5446 = mul(x = var_5385, y = const_232_promoted)[name = string("op_5446")]; - bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; - tensor input_125 = concat(axis = var_5444, interleave = input_125_interleave_0, values = (var_5385, var_5446))[name = string("input_125")]; - tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; - fp16 var_5441_to_fp16 = const()[name = string("op_5441_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_5441_to_fp16, x = input_125)[name = string("normed_149_cast_fp16")]; - tensor normed_151_begin_0 = const()[name = string("normed_151_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_151_end_0 = const()[name = string("normed_151_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_151_end_mask_0 = const()[name = string("normed_151_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_151 = slice_by_index(begin = normed_151_begin_0, end = normed_151_end_0, end_mask = normed_151_end_mask_0, x = normed_149_cast_fp16)[name = string("normed_151")]; - tensor var_5460_to_fp16 = const()[name = string("op_5460_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166352512)))]; - tensor q_13_cast_fp16 = mul(x = normed_151, y = var_5460_to_fp16)[name = string("q_13_cast_fp16")]; - int32 var_5471 = const()[name = string("op_5471"), val = int32(-1)]; - fp16 const_236_promoted = const()[name = string("const_236_promoted"), val = fp16(-0x1p+0)]; - tensor var_5473 = mul(x = var_5407, y = const_236_promoted)[name = string("op_5473")]; - bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)]; - tensor input_127 = concat(axis = var_5471, interleave = input_127_interleave_0, values = (var_5407, var_5473))[name = string("input_127")]; - tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; - fp16 var_5468_to_fp16 = const()[name = string("op_5468_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_5468_to_fp16, x = input_127)[name = string("normed_153_cast_fp16")]; - tensor normed_155_begin_0 = const()[name = string("normed_155_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_155_end_0 = const()[name = string("normed_155_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_155_end_mask_0 = const()[name = string("normed_155_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_155 = slice_by_index(begin = normed_155_begin_0, end = normed_155_end_0, end_mask = normed_155_end_mask_0, x = normed_153_cast_fp16)[name = string("normed_155")]; - tensor var_5487_to_fp16 = const()[name = string("op_5487_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166353088)))]; - tensor k_13_cast_fp16 = mul(x = normed_155, y = var_5487_to_fp16)[name = string("k_13_cast_fp16")]; - tensor var_5489_cast_fp16 = mul(x = q_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5489_cast_fp16")]; - tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13_cast_fp16)[name = string("x1_25_cast_fp16")]; - tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13_cast_fp16)[name = string("x2_25_cast_fp16")]; - fp16 const_242_promoted_to_fp16 = const()[name = string("const_242_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5510_cast_fp16 = mul(x = x2_25_cast_fp16, y = const_242_promoted_to_fp16)[name = string("op_5510_cast_fp16")]; - int32 var_5512 = const()[name = string("op_5512"), val = int32(-1)]; - bool var_5513_interleave_0 = const()[name = string("op_5513_interleave_0"), val = bool(false)]; - tensor var_5513_cast_fp16 = concat(axis = var_5512, interleave = var_5513_interleave_0, values = (var_5510_cast_fp16, x1_25_cast_fp16))[name = string("op_5513_cast_fp16")]; - tensor var_5514_cast_fp16 = mul(x = var_5513_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5514_cast_fp16")]; - tensor query_states_25_cast_fp16 = add(x = var_5489_cast_fp16, y = var_5514_cast_fp16)[name = string("query_states_25_cast_fp16")]; - tensor var_5517_cast_fp16 = mul(x = k_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5517_cast_fp16")]; - tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13_cast_fp16)[name = string("x1_27_cast_fp16")]; - tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13_cast_fp16)[name = string("x2_27_cast_fp16")]; - fp16 const_245_promoted_to_fp16 = const()[name = string("const_245_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5538_cast_fp16 = mul(x = x2_27_cast_fp16, y = const_245_promoted_to_fp16)[name = string("op_5538_cast_fp16")]; - int32 var_5540 = const()[name = string("op_5540"), val = int32(-1)]; - bool var_5541_interleave_0 = const()[name = string("op_5541_interleave_0"), val = bool(false)]; - tensor var_5541_cast_fp16 = concat(axis = var_5540, interleave = var_5541_interleave_0, values = (var_5538_cast_fp16, x1_27_cast_fp16))[name = string("op_5541_cast_fp16")]; - tensor var_5542_cast_fp16 = mul(x = var_5541_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5542_cast_fp16")]; - tensor key_states_25_cast_fp16 = add(x = var_5517_cast_fp16, y = var_5542_cast_fp16)[name = string("key_states_25_cast_fp16")]; - tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([5])]; - tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; - tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; - tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([6])]; - int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)]; - bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)]; - tensor concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_50")]; - tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; - tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; - int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; - bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; - tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_76, concat_51_values1_0, var_1909, concat_51_values3_0))[name = string("concat_51")]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_11_stride_0, update = key_states_25_cast_fp16, x = coreml_update_state_61)[name = string("model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_12_write_state")]; - tensor coreml_update_state_64 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_12")]; - tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([27])]; - tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; - tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; - tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([28])]; - int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; - bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; - tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_54")]; - tensor concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = tensor([0])]; - tensor concat_55_values3_0 = const()[name = string("concat_55_values3_0"), val = tensor([0])]; - int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; - bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; - tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (expand_dims_82, concat_55_values1_0, var_1909, concat_55_values3_0))[name = string("concat_55")]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_54, begin_mask = model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0, end = concat_55, end_mask = model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_12_stride_0, update = var_5429, x = coreml_update_state_64)[name = string("model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_13_write_state")]; - tensor coreml_update_state_65 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_13")]; - tensor var_5597_begin_0 = const()[name = string("op_5597_begin_0"), val = tensor([5, 0, 0, 0])]; - tensor var_5597_end_0 = const()[name = string("op_5597_end_0"), val = tensor([6, 1, 512, 256])]; - tensor var_5597_end_mask_0 = const()[name = string("op_5597_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_5597_cast_fp16 = slice_by_index(begin = var_5597_begin_0, end = var_5597_end_0, end_mask = var_5597_end_mask_0, x = coreml_update_state_65)[name = string("op_5597_cast_fp16")]; - tensor var_5604_begin_0 = const()[name = string("op_5604_begin_0"), val = tensor([27, 0, 0, 0])]; - tensor var_5604_end_0 = const()[name = string("op_5604_end_0"), val = tensor([28, 1, 512, 256])]; - tensor var_5604_end_mask_0 = const()[name = string("op_5604_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_5604_cast_fp16 = slice_by_index(begin = var_5604_begin_0, end = var_5604_end_0, end_mask = var_5604_end_mask_0, x = coreml_update_state_65)[name = string("op_5604_cast_fp16")]; - tensor var_5641 = const()[name = string("op_5641"), val = tensor([1, 4, 1, 1])]; - tensor x_101_cast_fp16 = tile(reps = var_5641, x = var_5597_cast_fp16)[name = string("x_101_cast_fp16")]; - tensor var_5661 = const()[name = string("op_5661"), val = tensor([1, 4, 1, 1])]; - tensor x_107_cast_fp16 = tile(reps = var_5661, x = var_5604_cast_fp16)[name = string("x_107_cast_fp16")]; - bool var_5688_transpose_x_1 = const()[name = string("op_5688_transpose_x_1"), val = bool(false)]; - bool var_5688_transpose_y_1 = const()[name = string("op_5688_transpose_y_1"), val = bool(true)]; - tensor var_5688 = matmul(transpose_x = var_5688_transpose_x_1, transpose_y = var_5688_transpose_y_1, x = query_states_25_cast_fp16, y = x_101_cast_fp16)[name = string("op_5688")]; - fp16 var_5689_to_fp16 = const()[name = string("op_5689_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_37_cast_fp16 = mul(x = var_5688, y = var_5689_to_fp16)[name = string("attn_weights_37_cast_fp16")]; - tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = var_2083)[name = string("attn_weights_39_cast_fp16")]; - int32 var_5724 = const()[name = string("op_5724"), val = int32(-1)]; - tensor attn_weights_41_cast_fp16 = softmax(axis = var_5724, x = attn_weights_39_cast_fp16)[name = string("attn_weights_41_cast_fp16")]; - bool attn_output_61_transpose_x_0 = const()[name = string("attn_output_61_transpose_x_0"), val = bool(false)]; - bool attn_output_61_transpose_y_0 = const()[name = string("attn_output_61_transpose_y_0"), val = bool(false)]; - tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_0, transpose_y = attn_output_61_transpose_y_0, x = attn_weights_41_cast_fp16, y = x_107_cast_fp16)[name = string("attn_output_61_cast_fp16")]; - tensor var_5735_perm_0 = const()[name = string("op_5735_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_5739 = const()[name = string("op_5739"), val = tensor([1, 1, 1024])]; - tensor var_5735_cast_fp16 = transpose(perm = var_5735_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_118")]; - tensor attn_output_65_cast_fp16 = reshape(shape = var_5739, x = var_5735_cast_fp16)[name = string("attn_output_65_cast_fp16")]; - tensor var_5744 = const()[name = string("op_5744"), val = tensor([0, 2, 1])]; - string var_5760_pad_type_0 = const()[name = string("op_5760_pad_type_0"), val = string("valid")]; - int32 var_5760_groups_0 = const()[name = string("op_5760_groups_0"), val = int32(1)]; - tensor var_5760_strides_0 = const()[name = string("op_5760_strides_0"), val = tensor([1])]; - tensor var_5760_pad_0 = const()[name = string("op_5760_pad_0"), val = tensor([0, 0])]; - tensor var_5760_dilations_0 = const()[name = string("op_5760_dilations_0"), val = tensor([1])]; - tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166353664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167238464))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_5745_cast_fp16 = transpose(perm = var_5744, x = attn_output_65_cast_fp16)[name = string("transpose_117")]; - tensor var_5760_cast_fp16 = conv(dilations = var_5760_dilations_0, groups = var_5760_groups_0, pad = var_5760_pad_0, pad_type = var_5760_pad_type_0, strides = var_5760_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_5745_cast_fp16)[name = string("op_5760_cast_fp16")]; - tensor var_5764 = const()[name = string("op_5764"), val = tensor([0, 2, 1])]; - int32 var_5775 = const()[name = string("op_5775"), val = int32(-1)]; - fp16 const_254_promoted_to_fp16 = const()[name = string("const_254_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_105_cast_fp16 = transpose(perm = var_5764, x = var_5760_cast_fp16)[name = string("transpose_116")]; - tensor var_5777_cast_fp16 = mul(x = hidden_states_105_cast_fp16, y = const_254_promoted_to_fp16)[name = string("op_5777_cast_fp16")]; - bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; - tensor input_131_cast_fp16 = concat(axis = var_5775, interleave = input_131_interleave_0, values = (hidden_states_105_cast_fp16, var_5777_cast_fp16))[name = string("input_131_cast_fp16")]; - tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; - fp16 var_5772_to_fp16 = const()[name = string("op_5772_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_5772_to_fp16, x = input_131_cast_fp16)[name = string("normed_157_cast_fp16")]; - tensor normed_159_begin_0 = const()[name = string("normed_159_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_159_end_0 = const()[name = string("normed_159_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_159_end_mask_0 = const()[name = string("normed_159_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_159_cast_fp16 = slice_by_index(begin = normed_159_begin_0, end = normed_159_end_0, end_mask = normed_159_end_mask_0, x = normed_157_cast_fp16)[name = string("normed_159_cast_fp16")]; - tensor var_5791_to_fp16 = const()[name = string("op_5791_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167256960)))]; - tensor attn_output_69_cast_fp16 = mul(x = normed_159_cast_fp16, y = var_5791_to_fp16)[name = string("attn_output_69_cast_fp16")]; - tensor hidden_states_107_cast_fp16 = add(x = hidden_states_97_cast_fp16, y = attn_output_69_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; - int32 var_5804 = const()[name = string("op_5804"), val = int32(-1)]; - fp16 const_258_promoted_to_fp16 = const()[name = string("const_258_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5806_cast_fp16 = mul(x = hidden_states_107_cast_fp16, y = const_258_promoted_to_fp16)[name = string("op_5806_cast_fp16")]; - bool input_133_interleave_0 = const()[name = string("input_133_interleave_0"), val = bool(false)]; - tensor input_133_cast_fp16 = concat(axis = var_5804, interleave = input_133_interleave_0, values = (hidden_states_107_cast_fp16, var_5806_cast_fp16))[name = string("input_133_cast_fp16")]; - tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; - fp16 var_5801_to_fp16 = const()[name = string("op_5801_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_5801_to_fp16, x = input_133_cast_fp16)[name = string("normed_161_cast_fp16")]; - tensor normed_163_begin_0 = const()[name = string("normed_163_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_163_end_0 = const()[name = string("normed_163_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_163_end_mask_0 = const()[name = string("normed_163_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_163_cast_fp16 = slice_by_index(begin = normed_163_begin_0, end = normed_163_end_0, end_mask = normed_163_end_mask_0, x = normed_161_cast_fp16)[name = string("normed_163_cast_fp16")]; - tensor var_5820_to_fp16 = const()[name = string("op_5820_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167259328)))]; - tensor x_109_cast_fp16 = mul(x = normed_163_cast_fp16, y = var_5820_to_fp16)[name = string("x_109_cast_fp16")]; - tensor var_5832 = const()[name = string("op_5832"), val = tensor([0, 2, 1])]; - tensor input_135_axes_0 = const()[name = string("input_135_axes_0"), val = tensor([2])]; - tensor var_5833_cast_fp16 = transpose(perm = var_5832, x = x_109_cast_fp16)[name = string("transpose_115")]; - tensor input_135_cast_fp16 = expand_dims(axes = input_135_axes_0, x = var_5833_cast_fp16)[name = string("input_135_cast_fp16")]; - string x_111_pad_type_0 = const()[name = string("x_111_pad_type_0"), val = string("valid")]; - tensor x_111_strides_0 = const()[name = string("x_111_strides_0"), val = tensor([1, 1])]; - tensor x_111_pad_0 = const()[name = string("x_111_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_111_dilations_0 = const()[name = string("x_111_dilations_0"), val = tensor([1, 1])]; - int32 x_111_groups_0 = const()[name = string("x_111_groups_0"), val = int32(1)]; - tensor model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167261696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173233728))))[name = string("model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_111_cast_fp16 = conv(dilations = x_111_dilations_0, groups = x_111_groups_0, pad = x_111_pad_0, pad_type = x_111_pad_type_0, strides = x_111_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_135_cast_fp16)[name = string("x_111_cast_fp16")]; - string b_13_pad_type_0 = const()[name = string("b_13_pad_type_0"), val = string("valid")]; - tensor b_13_strides_0 = const()[name = string("b_13_strides_0"), val = tensor([1, 1])]; - tensor b_13_pad_0 = const()[name = string("b_13_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_13_dilations_0 = const()[name = string("b_13_dilations_0"), val = tensor([1, 1])]; - int32 b_13_groups_0 = const()[name = string("b_13_groups_0"), val = int32(1)]; - tensor model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173344384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179316416))))[name = string("model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_13_cast_fp16 = conv(dilations = b_13_dilations_0, groups = b_13_groups_0, pad = b_13_pad_0, pad_type = b_13_pad_type_0, strides = b_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_135_cast_fp16)[name = string("b_13_cast_fp16")]; - string var_5858_mode_0 = const()[name = string("op_5858_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_5858_cast_fp16 = gelu(mode = var_5858_mode_0, x = x_111_cast_fp16)[name = string("op_5858_cast_fp16")]; - tensor input_137_cast_fp16 = mul(x = var_5858_cast_fp16, y = b_13_cast_fp16)[name = string("input_137_cast_fp16")]; - string e_13_pad_type_0 = const()[name = string("e_13_pad_type_0"), val = string("valid")]; - tensor e_13_strides_0 = const()[name = string("e_13_strides_0"), val = tensor([1, 1])]; - tensor e_13_pad_0 = const()[name = string("e_13_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_13_dilations_0 = const()[name = string("e_13_dilations_0"), val = tensor([1, 1])]; - int32 e_13_groups_0 = const()[name = string("e_13_groups_0"), val = int32(1)]; - tensor model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179427072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185399104))))[name = string("model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_13_cast_fp16 = conv(dilations = e_13_dilations_0, groups = e_13_groups_0, pad = e_13_pad_0, pad_type = e_13_pad_type_0, strides = e_13_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_137_cast_fp16)[name = string("e_13_cast_fp16")]; - tensor var_5866_axes_0 = const()[name = string("op_5866_axes_0"), val = tensor([2])]; - tensor var_5866_cast_fp16 = squeeze(axes = var_5866_axes_0, x = e_13_cast_fp16)[name = string("op_5866_cast_fp16")]; - tensor var_5867 = const()[name = string("op_5867"), val = tensor([0, 2, 1])]; - int32 var_5878 = const()[name = string("op_5878"), val = int32(-1)]; - fp16 const_262_promoted_to_fp16 = const()[name = string("const_262_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_109_cast_fp16 = transpose(perm = var_5867, x = var_5866_cast_fp16)[name = string("transpose_114")]; - tensor var_5880_cast_fp16 = mul(x = hidden_states_109_cast_fp16, y = const_262_promoted_to_fp16)[name = string("op_5880_cast_fp16")]; - bool input_139_interleave_0 = const()[name = string("input_139_interleave_0"), val = bool(false)]; - tensor input_139_cast_fp16 = concat(axis = var_5878, interleave = input_139_interleave_0, values = (hidden_states_109_cast_fp16, var_5880_cast_fp16))[name = string("input_139_cast_fp16")]; - tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; - fp16 var_5875_to_fp16 = const()[name = string("op_5875_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_5875_to_fp16, x = input_139_cast_fp16)[name = string("normed_165_cast_fp16")]; - tensor normed_167_begin_0 = const()[name = string("normed_167_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_167_end_0 = const()[name = string("normed_167_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_167_end_mask_0 = const()[name = string("normed_167_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_167_cast_fp16 = slice_by_index(begin = normed_167_begin_0, end = normed_167_end_0, end_mask = normed_167_end_mask_0, x = normed_165_cast_fp16)[name = string("normed_167_cast_fp16")]; - tensor var_5894_to_fp16 = const()[name = string("op_5894_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185417600)))]; - tensor hidden_states_111_cast_fp16 = mul(x = normed_167_cast_fp16, y = var_5894_to_fp16)[name = string("hidden_states_111_cast_fp16")]; - tensor hidden_states_113_cast_fp16 = add(x = hidden_states_107_cast_fp16, y = hidden_states_111_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; - int32 var_5945 = const()[name = string("op_5945"), val = int32(-1)]; - fp16 const_266_promoted_to_fp16 = const()[name = string("const_266_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5947_cast_fp16 = mul(x = hidden_states_113_cast_fp16, y = const_266_promoted_to_fp16)[name = string("op_5947_cast_fp16")]; - bool input_141_interleave_0 = const()[name = string("input_141_interleave_0"), val = bool(false)]; - tensor input_141_cast_fp16 = concat(axis = var_5945, interleave = input_141_interleave_0, values = (hidden_states_113_cast_fp16, var_5947_cast_fp16))[name = string("input_141_cast_fp16")]; - tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; - fp16 var_5942_to_fp16 = const()[name = string("op_5942_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_5942_to_fp16, x = input_141_cast_fp16)[name = string("normed_169_cast_fp16")]; - tensor normed_171_begin_0 = const()[name = string("normed_171_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_171_end_0 = const()[name = string("normed_171_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_171_end_mask_0 = const()[name = string("normed_171_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_171_cast_fp16 = slice_by_index(begin = normed_171_begin_0, end = normed_171_end_0, end_mask = normed_171_end_mask_0, x = normed_169_cast_fp16)[name = string("normed_171_cast_fp16")]; - tensor var_5961_to_fp16 = const()[name = string("op_5961_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185419968)))]; - tensor hidden_states_115_cast_fp16 = mul(x = normed_171_cast_fp16, y = var_5961_to_fp16)[name = string("hidden_states_115_cast_fp16")]; - tensor var_5966 = const()[name = string("op_5966"), val = tensor([0, 2, 1])]; - tensor var_5969_axes_0 = const()[name = string("op_5969_axes_0"), val = tensor([2])]; - tensor var_5967_cast_fp16 = transpose(perm = var_5966, x = hidden_states_115_cast_fp16)[name = string("transpose_113")]; - tensor var_5969_cast_fp16 = expand_dims(axes = var_5969_axes_0, x = var_5967_cast_fp16)[name = string("op_5969_cast_fp16")]; - string var_5985_pad_type_0 = const()[name = string("op_5985_pad_type_0"), val = string("valid")]; - tensor var_5985_strides_0 = const()[name = string("op_5985_strides_0"), val = tensor([1, 1])]; - tensor var_5985_pad_0 = const()[name = string("op_5985_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_5985_dilations_0 = const()[name = string("op_5985_dilations_0"), val = tensor([1, 1])]; - int32 var_5985_groups_0 = const()[name = string("op_5985_groups_0"), val = int32(1)]; - tensor var_5985 = conv(dilations = var_5985_dilations_0, groups = var_5985_groups_0, pad = var_5985_pad_0, pad_type = var_5985_pad_type_0, strides = var_5985_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_5969_cast_fp16)[name = string("op_5985")]; - tensor var_5990 = const()[name = string("op_5990"), val = tensor([1, 4, 1, 256])]; - tensor var_5991 = reshape(shape = var_5990, x = var_5985)[name = string("op_5991")]; - string var_6007_pad_type_0 = const()[name = string("op_6007_pad_type_0"), val = string("valid")]; - tensor var_6007_strides_0 = const()[name = string("op_6007_strides_0"), val = tensor([1, 1])]; - tensor var_6007_pad_0 = const()[name = string("op_6007_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_6007_dilations_0 = const()[name = string("op_6007_dilations_0"), val = tensor([1, 1])]; - int32 var_6007_groups_0 = const()[name = string("op_6007_groups_0"), val = int32(1)]; - tensor var_6007 = conv(dilations = var_6007_dilations_0, groups = var_6007_groups_0, pad = var_6007_pad_0, pad_type = var_6007_pad_type_0, strides = var_6007_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_5969_cast_fp16)[name = string("op_6007")]; - tensor var_6012 = const()[name = string("op_6012"), val = tensor([1, 1, 1, 256])]; - tensor var_6013 = reshape(shape = var_6012, x = var_6007)[name = string("op_6013")]; - string var_6029_pad_type_0 = const()[name = string("op_6029_pad_type_0"), val = string("valid")]; - tensor var_6029_strides_0 = const()[name = string("op_6029_strides_0"), val = tensor([1, 1])]; - tensor var_6029_pad_0 = const()[name = string("op_6029_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_6029_dilations_0 = const()[name = string("op_6029_dilations_0"), val = tensor([1, 1])]; - int32 var_6029_groups_0 = const()[name = string("op_6029_groups_0"), val = int32(1)]; - tensor var_6029 = conv(dilations = var_6029_dilations_0, groups = var_6029_groups_0, pad = var_6029_pad_0, pad_type = var_6029_pad_type_0, strides = var_6029_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_5969_cast_fp16)[name = string("op_6029")]; - tensor var_6034 = const()[name = string("op_6034"), val = tensor([1, 1, 1, 256])]; - tensor var_6035 = reshape(shape = var_6034, x = var_6029)[name = string("op_6035")]; - int32 var_6050 = const()[name = string("op_6050"), val = int32(-1)]; - fp16 const_270_promoted = const()[name = string("const_270_promoted"), val = fp16(-0x1p+0)]; - tensor var_6052 = mul(x = var_5991, y = const_270_promoted)[name = string("op_6052")]; - bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; - tensor input_145 = concat(axis = var_6050, interleave = input_145_interleave_0, values = (var_5991, var_6052))[name = string("input_145")]; - tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; - fp16 var_6047_to_fp16 = const()[name = string("op_6047_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_6047_to_fp16, x = input_145)[name = string("normed_173_cast_fp16")]; - tensor normed_175_begin_0 = const()[name = string("normed_175_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_175_end_0 = const()[name = string("normed_175_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_175_end_mask_0 = const()[name = string("normed_175_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_175 = slice_by_index(begin = normed_175_begin_0, end = normed_175_end_0, end_mask = normed_175_end_mask_0, x = normed_173_cast_fp16)[name = string("normed_175")]; - tensor var_6066_to_fp16 = const()[name = string("op_6066_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185422336)))]; - tensor q_15_cast_fp16 = mul(x = normed_175, y = var_6066_to_fp16)[name = string("q_15_cast_fp16")]; - int32 var_6077 = const()[name = string("op_6077"), val = int32(-1)]; - fp16 const_274_promoted = const()[name = string("const_274_promoted"), val = fp16(-0x1p+0)]; - tensor var_6079 = mul(x = var_6013, y = const_274_promoted)[name = string("op_6079")]; - bool input_147_interleave_0 = const()[name = string("input_147_interleave_0"), val = bool(false)]; - tensor input_147 = concat(axis = var_6077, interleave = input_147_interleave_0, values = (var_6013, var_6079))[name = string("input_147")]; - tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; - fp16 var_6074_to_fp16 = const()[name = string("op_6074_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_6074_to_fp16, x = input_147)[name = string("normed_177_cast_fp16")]; - tensor normed_179_begin_0 = const()[name = string("normed_179_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_179_end_0 = const()[name = string("normed_179_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_179_end_mask_0 = const()[name = string("normed_179_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_179 = slice_by_index(begin = normed_179_begin_0, end = normed_179_end_0, end_mask = normed_179_end_mask_0, x = normed_177_cast_fp16)[name = string("normed_179")]; - tensor var_6093_to_fp16 = const()[name = string("op_6093_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185422912)))]; - tensor k_15_cast_fp16 = mul(x = normed_179, y = var_6093_to_fp16)[name = string("k_15_cast_fp16")]; - tensor var_6095_cast_fp16 = mul(x = q_15_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6095_cast_fp16")]; - tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15_cast_fp16)[name = string("x1_29_cast_fp16")]; - tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15_cast_fp16)[name = string("x2_29_cast_fp16")]; - fp16 const_280_promoted_to_fp16 = const()[name = string("const_280_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6116_cast_fp16 = mul(x = x2_29_cast_fp16, y = const_280_promoted_to_fp16)[name = string("op_6116_cast_fp16")]; - int32 var_6118 = const()[name = string("op_6118"), val = int32(-1)]; - bool var_6119_interleave_0 = const()[name = string("op_6119_interleave_0"), val = bool(false)]; - tensor var_6119_cast_fp16 = concat(axis = var_6118, interleave = var_6119_interleave_0, values = (var_6116_cast_fp16, x1_29_cast_fp16))[name = string("op_6119_cast_fp16")]; - tensor var_6120_cast_fp16 = mul(x = var_6119_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6120_cast_fp16")]; - tensor query_states_29_cast_fp16 = add(x = var_6095_cast_fp16, y = var_6120_cast_fp16)[name = string("query_states_29_cast_fp16")]; - tensor var_6123_cast_fp16 = mul(x = k_15_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6123_cast_fp16")]; - tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15_cast_fp16)[name = string("x1_31_cast_fp16")]; - tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15_cast_fp16)[name = string("x2_31_cast_fp16")]; - fp16 const_283_promoted_to_fp16 = const()[name = string("const_283_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6144_cast_fp16 = mul(x = x2_31_cast_fp16, y = const_283_promoted_to_fp16)[name = string("op_6144_cast_fp16")]; - int32 var_6146 = const()[name = string("op_6146"), val = int32(-1)]; - bool var_6147_interleave_0 = const()[name = string("op_6147_interleave_0"), val = bool(false)]; - tensor var_6147_cast_fp16 = concat(axis = var_6146, interleave = var_6147_interleave_0, values = (var_6144_cast_fp16, x1_31_cast_fp16))[name = string("op_6147_cast_fp16")]; - tensor var_6148_cast_fp16 = mul(x = var_6147_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6148_cast_fp16")]; - tensor key_states_29_cast_fp16 = add(x = var_6123_cast_fp16, y = var_6148_cast_fp16)[name = string("key_states_29_cast_fp16")]; - tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([6])]; - tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; - tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; - tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([7])]; - int32 concat_58_axis_0 = const()[name = string("concat_58_axis_0"), val = int32(0)]; - bool concat_58_interleave_0 = const()[name = string("concat_58_interleave_0"), val = bool(false)]; - tensor concat_58 = concat(axis = concat_58_axis_0, interleave = concat_58_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_58")]; - tensor concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = tensor([0])]; - tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; - int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; - bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; - tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_88, concat_59_values1_0, var_1909, concat_59_values3_0))[name = string("concat_59")]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_13_stride_0, update = key_states_29_cast_fp16, x = coreml_update_state_65)[name = string("model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_14_write_state")]; - tensor coreml_update_state_66 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_14")]; - tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([28])]; - tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; - tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; - tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([29])]; - int32 concat_62_axis_0 = const()[name = string("concat_62_axis_0"), val = int32(0)]; - bool concat_62_interleave_0 = const()[name = string("concat_62_interleave_0"), val = bool(false)]; - tensor concat_62 = concat(axis = concat_62_axis_0, interleave = concat_62_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_62")]; - tensor concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor([0])]; - tensor concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor([0])]; - int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; - bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; - tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (expand_dims_94, concat_63_values1_0, var_1909, concat_63_values3_0))[name = string("concat_63")]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_62, begin_mask = model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0, end = concat_63, end_mask = model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_14_stride_0, update = var_6035, x = coreml_update_state_66)[name = string("model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_15_write_state")]; - tensor coreml_update_state_67 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_15")]; - tensor var_6203_begin_0 = const()[name = string("op_6203_begin_0"), val = tensor([6, 0, 0, 0])]; - tensor var_6203_end_0 = const()[name = string("op_6203_end_0"), val = tensor([7, 1, 512, 256])]; - tensor var_6203_end_mask_0 = const()[name = string("op_6203_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_6203_cast_fp16 = slice_by_index(begin = var_6203_begin_0, end = var_6203_end_0, end_mask = var_6203_end_mask_0, x = coreml_update_state_67)[name = string("op_6203_cast_fp16")]; - tensor var_6210_begin_0 = const()[name = string("op_6210_begin_0"), val = tensor([28, 0, 0, 0])]; - tensor var_6210_end_0 = const()[name = string("op_6210_end_0"), val = tensor([29, 1, 512, 256])]; - tensor var_6210_end_mask_0 = const()[name = string("op_6210_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_6210_cast_fp16 = slice_by_index(begin = var_6210_begin_0, end = var_6210_end_0, end_mask = var_6210_end_mask_0, x = coreml_update_state_67)[name = string("op_6210_cast_fp16")]; - tensor var_6247 = const()[name = string("op_6247"), val = tensor([1, 4, 1, 1])]; - tensor x_117_cast_fp16 = tile(reps = var_6247, x = var_6203_cast_fp16)[name = string("x_117_cast_fp16")]; - tensor var_6267 = const()[name = string("op_6267"), val = tensor([1, 4, 1, 1])]; - tensor x_123_cast_fp16 = tile(reps = var_6267, x = var_6210_cast_fp16)[name = string("x_123_cast_fp16")]; - bool var_6294_transpose_x_1 = const()[name = string("op_6294_transpose_x_1"), val = bool(false)]; - bool var_6294_transpose_y_1 = const()[name = string("op_6294_transpose_y_1"), val = bool(true)]; - tensor var_6294 = matmul(transpose_x = var_6294_transpose_x_1, transpose_y = var_6294_transpose_y_1, x = query_states_29_cast_fp16, y = x_117_cast_fp16)[name = string("op_6294")]; - fp16 var_6295_to_fp16 = const()[name = string("op_6295_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_43_cast_fp16 = mul(x = var_6294, y = var_6295_to_fp16)[name = string("attn_weights_43_cast_fp16")]; - tensor attn_weights_45_cast_fp16 = add(x = attn_weights_43_cast_fp16, y = var_2083)[name = string("attn_weights_45_cast_fp16")]; - int32 var_6330 = const()[name = string("op_6330"), val = int32(-1)]; - tensor attn_weights_47_cast_fp16 = softmax(axis = var_6330, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; - bool attn_output_71_transpose_x_0 = const()[name = string("attn_output_71_transpose_x_0"), val = bool(false)]; - bool attn_output_71_transpose_y_0 = const()[name = string("attn_output_71_transpose_y_0"), val = bool(false)]; - tensor attn_output_71_cast_fp16 = matmul(transpose_x = attn_output_71_transpose_x_0, transpose_y = attn_output_71_transpose_y_0, x = attn_weights_47_cast_fp16, y = x_123_cast_fp16)[name = string("attn_output_71_cast_fp16")]; - tensor var_6341_perm_0 = const()[name = string("op_6341_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_6345 = const()[name = string("op_6345"), val = tensor([1, 1, 1024])]; - tensor var_6341_cast_fp16 = transpose(perm = var_6341_perm_0, x = attn_output_71_cast_fp16)[name = string("transpose_112")]; - tensor attn_output_75_cast_fp16 = reshape(shape = var_6345, x = var_6341_cast_fp16)[name = string("attn_output_75_cast_fp16")]; - tensor var_6350 = const()[name = string("op_6350"), val = tensor([0, 2, 1])]; - string var_6366_pad_type_0 = const()[name = string("op_6366_pad_type_0"), val = string("valid")]; - int32 var_6366_groups_0 = const()[name = string("op_6366_groups_0"), val = int32(1)]; - tensor var_6366_strides_0 = const()[name = string("op_6366_strides_0"), val = tensor([1])]; - tensor var_6366_pad_0 = const()[name = string("op_6366_pad_0"), val = tensor([0, 0])]; - tensor var_6366_dilations_0 = const()[name = string("op_6366_dilations_0"), val = tensor([1])]; - tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185423488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186308288))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_6351_cast_fp16 = transpose(perm = var_6350, x = attn_output_75_cast_fp16)[name = string("transpose_111")]; - tensor var_6366_cast_fp16 = conv(dilations = var_6366_dilations_0, groups = var_6366_groups_0, pad = var_6366_pad_0, pad_type = var_6366_pad_type_0, strides = var_6366_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_6351_cast_fp16)[name = string("op_6366_cast_fp16")]; - tensor var_6370 = const()[name = string("op_6370"), val = tensor([0, 2, 1])]; - int32 var_6381 = const()[name = string("op_6381"), val = int32(-1)]; - fp16 const_292_promoted_to_fp16 = const()[name = string("const_292_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_121_cast_fp16 = transpose(perm = var_6370, x = var_6366_cast_fp16)[name = string("transpose_110")]; - tensor var_6383_cast_fp16 = mul(x = hidden_states_121_cast_fp16, y = const_292_promoted_to_fp16)[name = string("op_6383_cast_fp16")]; - bool input_151_interleave_0 = const()[name = string("input_151_interleave_0"), val = bool(false)]; - tensor input_151_cast_fp16 = concat(axis = var_6381, interleave = input_151_interleave_0, values = (hidden_states_121_cast_fp16, var_6383_cast_fp16))[name = string("input_151_cast_fp16")]; - tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; - fp16 var_6378_to_fp16 = const()[name = string("op_6378_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_6378_to_fp16, x = input_151_cast_fp16)[name = string("normed_181_cast_fp16")]; - tensor normed_183_begin_0 = const()[name = string("normed_183_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_183_end_0 = const()[name = string("normed_183_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_183_end_mask_0 = const()[name = string("normed_183_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_183_cast_fp16 = slice_by_index(begin = normed_183_begin_0, end = normed_183_end_0, end_mask = normed_183_end_mask_0, x = normed_181_cast_fp16)[name = string("normed_183_cast_fp16")]; - tensor var_6397_to_fp16 = const()[name = string("op_6397_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186326784)))]; - tensor attn_output_79_cast_fp16 = mul(x = normed_183_cast_fp16, y = var_6397_to_fp16)[name = string("attn_output_79_cast_fp16")]; - tensor hidden_states_123_cast_fp16 = add(x = hidden_states_113_cast_fp16, y = attn_output_79_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; - int32 var_6410 = const()[name = string("op_6410"), val = int32(-1)]; - fp16 const_296_promoted_to_fp16 = const()[name = string("const_296_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6412_cast_fp16 = mul(x = hidden_states_123_cast_fp16, y = const_296_promoted_to_fp16)[name = string("op_6412_cast_fp16")]; - bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; - tensor input_153_cast_fp16 = concat(axis = var_6410, interleave = input_153_interleave_0, values = (hidden_states_123_cast_fp16, var_6412_cast_fp16))[name = string("input_153_cast_fp16")]; - tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; - fp16 var_6407_to_fp16 = const()[name = string("op_6407_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_6407_to_fp16, x = input_153_cast_fp16)[name = string("normed_185_cast_fp16")]; - tensor normed_187_begin_0 = const()[name = string("normed_187_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_187_end_0 = const()[name = string("normed_187_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_187_end_mask_0 = const()[name = string("normed_187_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_187_cast_fp16 = slice_by_index(begin = normed_187_begin_0, end = normed_187_end_0, end_mask = normed_187_end_mask_0, x = normed_185_cast_fp16)[name = string("normed_187_cast_fp16")]; - tensor var_6426_to_fp16 = const()[name = string("op_6426_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186329152)))]; - tensor x_125_cast_fp16 = mul(x = normed_187_cast_fp16, y = var_6426_to_fp16)[name = string("x_125_cast_fp16")]; - tensor var_6438 = const()[name = string("op_6438"), val = tensor([0, 2, 1])]; - tensor input_155_axes_0 = const()[name = string("input_155_axes_0"), val = tensor([2])]; - tensor var_6439_cast_fp16 = transpose(perm = var_6438, x = x_125_cast_fp16)[name = string("transpose_109")]; - tensor input_155_cast_fp16 = expand_dims(axes = input_155_axes_0, x = var_6439_cast_fp16)[name = string("input_155_cast_fp16")]; - string x_127_pad_type_0 = const()[name = string("x_127_pad_type_0"), val = string("valid")]; - tensor x_127_strides_0 = const()[name = string("x_127_strides_0"), val = tensor([1, 1])]; - tensor x_127_pad_0 = const()[name = string("x_127_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_127_dilations_0 = const()[name = string("x_127_dilations_0"), val = tensor([1, 1])]; - int32 x_127_groups_0 = const()[name = string("x_127_groups_0"), val = int32(1)]; - tensor model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186331520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192303552))))[name = string("model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_127_cast_fp16 = conv(dilations = x_127_dilations_0, groups = x_127_groups_0, pad = x_127_pad_0, pad_type = x_127_pad_type_0, strides = x_127_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_155_cast_fp16)[name = string("x_127_cast_fp16")]; - string b_15_pad_type_0 = const()[name = string("b_15_pad_type_0"), val = string("valid")]; - tensor b_15_strides_0 = const()[name = string("b_15_strides_0"), val = tensor([1, 1])]; - tensor b_15_pad_0 = const()[name = string("b_15_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_15_dilations_0 = const()[name = string("b_15_dilations_0"), val = tensor([1, 1])]; - int32 b_15_groups_0 = const()[name = string("b_15_groups_0"), val = int32(1)]; - tensor model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192414208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198386240))))[name = string("model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_15_cast_fp16 = conv(dilations = b_15_dilations_0, groups = b_15_groups_0, pad = b_15_pad_0, pad_type = b_15_pad_type_0, strides = b_15_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_155_cast_fp16)[name = string("b_15_cast_fp16")]; - string var_6464_mode_0 = const()[name = string("op_6464_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_6464_cast_fp16 = gelu(mode = var_6464_mode_0, x = x_127_cast_fp16)[name = string("op_6464_cast_fp16")]; - tensor input_157_cast_fp16 = mul(x = var_6464_cast_fp16, y = b_15_cast_fp16)[name = string("input_157_cast_fp16")]; - string e_15_pad_type_0 = const()[name = string("e_15_pad_type_0"), val = string("valid")]; - tensor e_15_strides_0 = const()[name = string("e_15_strides_0"), val = tensor([1, 1])]; - tensor e_15_pad_0 = const()[name = string("e_15_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_15_dilations_0 = const()[name = string("e_15_dilations_0"), val = tensor([1, 1])]; - int32 e_15_groups_0 = const()[name = string("e_15_groups_0"), val = int32(1)]; - tensor model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198496896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204468928))))[name = string("model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_15_cast_fp16 = conv(dilations = e_15_dilations_0, groups = e_15_groups_0, pad = e_15_pad_0, pad_type = e_15_pad_type_0, strides = e_15_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_157_cast_fp16)[name = string("e_15_cast_fp16")]; - tensor var_6472_axes_0 = const()[name = string("op_6472_axes_0"), val = tensor([2])]; - tensor var_6472_cast_fp16 = squeeze(axes = var_6472_axes_0, x = e_15_cast_fp16)[name = string("op_6472_cast_fp16")]; - tensor var_6473 = const()[name = string("op_6473"), val = tensor([0, 2, 1])]; - int32 var_6484 = const()[name = string("op_6484"), val = int32(-1)]; - fp16 const_300_promoted_to_fp16 = const()[name = string("const_300_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_125_cast_fp16 = transpose(perm = var_6473, x = var_6472_cast_fp16)[name = string("transpose_108")]; - tensor var_6486_cast_fp16 = mul(x = hidden_states_125_cast_fp16, y = const_300_promoted_to_fp16)[name = string("op_6486_cast_fp16")]; - bool input_159_interleave_0 = const()[name = string("input_159_interleave_0"), val = bool(false)]; - tensor input_159_cast_fp16 = concat(axis = var_6484, interleave = input_159_interleave_0, values = (hidden_states_125_cast_fp16, var_6486_cast_fp16))[name = string("input_159_cast_fp16")]; - tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; - fp16 var_6481_to_fp16 = const()[name = string("op_6481_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_6481_to_fp16, x = input_159_cast_fp16)[name = string("normed_189_cast_fp16")]; - tensor normed_191_begin_0 = const()[name = string("normed_191_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_191_end_0 = const()[name = string("normed_191_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_191_end_mask_0 = const()[name = string("normed_191_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_191_cast_fp16 = slice_by_index(begin = normed_191_begin_0, end = normed_191_end_0, end_mask = normed_191_end_mask_0, x = normed_189_cast_fp16)[name = string("normed_191_cast_fp16")]; - tensor var_6500_to_fp16 = const()[name = string("op_6500_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204487424)))]; - tensor hidden_states_127_cast_fp16 = mul(x = normed_191_cast_fp16, y = var_6500_to_fp16)[name = string("hidden_states_127_cast_fp16")]; - tensor hidden_states_129_cast_fp16 = add(x = hidden_states_123_cast_fp16, y = hidden_states_127_cast_fp16)[name = string("hidden_states_129_cast_fp16")]; - int32 var_6551 = const()[name = string("op_6551"), val = int32(-1)]; - fp16 const_304_promoted_to_fp16 = const()[name = string("const_304_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6553_cast_fp16 = mul(x = hidden_states_129_cast_fp16, y = const_304_promoted_to_fp16)[name = string("op_6553_cast_fp16")]; - bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; - tensor input_161_cast_fp16 = concat(axis = var_6551, interleave = input_161_interleave_0, values = (hidden_states_129_cast_fp16, var_6553_cast_fp16))[name = string("input_161_cast_fp16")]; - tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; - fp16 var_6548_to_fp16 = const()[name = string("op_6548_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_6548_to_fp16, x = input_161_cast_fp16)[name = string("normed_193_cast_fp16")]; - tensor normed_195_begin_0 = const()[name = string("normed_195_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_195_end_0 = const()[name = string("normed_195_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_195_end_mask_0 = const()[name = string("normed_195_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_195_cast_fp16 = slice_by_index(begin = normed_195_begin_0, end = normed_195_end_0, end_mask = normed_195_end_mask_0, x = normed_193_cast_fp16)[name = string("normed_195_cast_fp16")]; - tensor var_6567_to_fp16 = const()[name = string("op_6567_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204489792)))]; - tensor hidden_states_131_cast_fp16 = mul(x = normed_195_cast_fp16, y = var_6567_to_fp16)[name = string("hidden_states_131_cast_fp16")]; - tensor var_6572 = const()[name = string("op_6572"), val = tensor([0, 2, 1])]; - tensor var_6575_axes_0 = const()[name = string("op_6575_axes_0"), val = tensor([2])]; - tensor var_6573_cast_fp16 = transpose(perm = var_6572, x = hidden_states_131_cast_fp16)[name = string("transpose_107")]; - tensor var_6575_cast_fp16 = expand_dims(axes = var_6575_axes_0, x = var_6573_cast_fp16)[name = string("op_6575_cast_fp16")]; - string var_6591_pad_type_0 = const()[name = string("op_6591_pad_type_0"), val = string("valid")]; - tensor var_6591_strides_0 = const()[name = string("op_6591_strides_0"), val = tensor([1, 1])]; - tensor var_6591_pad_0 = const()[name = string("op_6591_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_6591_dilations_0 = const()[name = string("op_6591_dilations_0"), val = tensor([1, 1])]; - int32 var_6591_groups_0 = const()[name = string("op_6591_groups_0"), val = int32(1)]; - tensor var_6591 = conv(dilations = var_6591_dilations_0, groups = var_6591_groups_0, pad = var_6591_pad_0, pad_type = var_6591_pad_type_0, strides = var_6591_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_6575_cast_fp16)[name = string("op_6591")]; - tensor var_6596 = const()[name = string("op_6596"), val = tensor([1, 4, 1, 256])]; - tensor var_6597 = reshape(shape = var_6596, x = var_6591)[name = string("op_6597")]; - string var_6613_pad_type_0 = const()[name = string("op_6613_pad_type_0"), val = string("valid")]; - tensor var_6613_strides_0 = const()[name = string("op_6613_strides_0"), val = tensor([1, 1])]; - tensor var_6613_pad_0 = const()[name = string("op_6613_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_6613_dilations_0 = const()[name = string("op_6613_dilations_0"), val = tensor([1, 1])]; - int32 var_6613_groups_0 = const()[name = string("op_6613_groups_0"), val = int32(1)]; - tensor var_6613 = conv(dilations = var_6613_dilations_0, groups = var_6613_groups_0, pad = var_6613_pad_0, pad_type = var_6613_pad_type_0, strides = var_6613_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_6575_cast_fp16)[name = string("op_6613")]; - tensor var_6618 = const()[name = string("op_6618"), val = tensor([1, 1, 1, 256])]; - tensor var_6619 = reshape(shape = var_6618, x = var_6613)[name = string("op_6619")]; - string var_6635_pad_type_0 = const()[name = string("op_6635_pad_type_0"), val = string("valid")]; - tensor var_6635_strides_0 = const()[name = string("op_6635_strides_0"), val = tensor([1, 1])]; - tensor var_6635_pad_0 = const()[name = string("op_6635_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_6635_dilations_0 = const()[name = string("op_6635_dilations_0"), val = tensor([1, 1])]; - int32 var_6635_groups_0 = const()[name = string("op_6635_groups_0"), val = int32(1)]; - tensor var_6635 = conv(dilations = var_6635_dilations_0, groups = var_6635_groups_0, pad = var_6635_pad_0, pad_type = var_6635_pad_type_0, strides = var_6635_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_6575_cast_fp16)[name = string("op_6635")]; - tensor var_6640 = const()[name = string("op_6640"), val = tensor([1, 1, 1, 256])]; - tensor var_6641 = reshape(shape = var_6640, x = var_6635)[name = string("op_6641")]; - int32 var_6656 = const()[name = string("op_6656"), val = int32(-1)]; - fp16 const_308_promoted = const()[name = string("const_308_promoted"), val = fp16(-0x1p+0)]; - tensor var_6658 = mul(x = var_6597, y = const_308_promoted)[name = string("op_6658")]; - bool input_165_interleave_0 = const()[name = string("input_165_interleave_0"), val = bool(false)]; - tensor input_165 = concat(axis = var_6656, interleave = input_165_interleave_0, values = (var_6597, var_6658))[name = string("input_165")]; - tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; - fp16 var_6653_to_fp16 = const()[name = string("op_6653_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_6653_to_fp16, x = input_165)[name = string("normed_197_cast_fp16")]; - tensor normed_199_begin_0 = const()[name = string("normed_199_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_199_end_0 = const()[name = string("normed_199_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_199_end_mask_0 = const()[name = string("normed_199_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_199 = slice_by_index(begin = normed_199_begin_0, end = normed_199_end_0, end_mask = normed_199_end_mask_0, x = normed_197_cast_fp16)[name = string("normed_199")]; - tensor var_6672_to_fp16 = const()[name = string("op_6672_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204492160)))]; - tensor q_17_cast_fp16 = mul(x = normed_199, y = var_6672_to_fp16)[name = string("q_17_cast_fp16")]; - int32 var_6683 = const()[name = string("op_6683"), val = int32(-1)]; - fp16 const_312_promoted = const()[name = string("const_312_promoted"), val = fp16(-0x1p+0)]; - tensor var_6685 = mul(x = var_6619, y = const_312_promoted)[name = string("op_6685")]; - bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; - tensor input_167 = concat(axis = var_6683, interleave = input_167_interleave_0, values = (var_6619, var_6685))[name = string("input_167")]; - tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; - fp16 var_6680_to_fp16 = const()[name = string("op_6680_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_6680_to_fp16, x = input_167)[name = string("normed_201_cast_fp16")]; - tensor normed_203_begin_0 = const()[name = string("normed_203_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_203_end_0 = const()[name = string("normed_203_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_203_end_mask_0 = const()[name = string("normed_203_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_203 = slice_by_index(begin = normed_203_begin_0, end = normed_203_end_0, end_mask = normed_203_end_mask_0, x = normed_201_cast_fp16)[name = string("normed_203")]; - tensor var_6699_to_fp16 = const()[name = string("op_6699_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204492736)))]; - tensor k_17_cast_fp16 = mul(x = normed_203, y = var_6699_to_fp16)[name = string("k_17_cast_fp16")]; - tensor var_6701_cast_fp16 = mul(x = q_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6701_cast_fp16")]; - tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17_cast_fp16)[name = string("x1_33_cast_fp16")]; - tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17_cast_fp16)[name = string("x2_33_cast_fp16")]; - fp16 const_318_promoted_to_fp16 = const()[name = string("const_318_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6722_cast_fp16 = mul(x = x2_33_cast_fp16, y = const_318_promoted_to_fp16)[name = string("op_6722_cast_fp16")]; - int32 var_6724 = const()[name = string("op_6724"), val = int32(-1)]; - bool var_6725_interleave_0 = const()[name = string("op_6725_interleave_0"), val = bool(false)]; - tensor var_6725_cast_fp16 = concat(axis = var_6724, interleave = var_6725_interleave_0, values = (var_6722_cast_fp16, x1_33_cast_fp16))[name = string("op_6725_cast_fp16")]; - tensor var_6726_cast_fp16 = mul(x = var_6725_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6726_cast_fp16")]; - tensor query_states_33_cast_fp16 = add(x = var_6701_cast_fp16, y = var_6726_cast_fp16)[name = string("query_states_33_cast_fp16")]; - tensor var_6729_cast_fp16 = mul(x = k_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6729_cast_fp16")]; - tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17_cast_fp16)[name = string("x1_35_cast_fp16")]; - tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17_cast_fp16)[name = string("x2_35_cast_fp16")]; - fp16 const_321_promoted_to_fp16 = const()[name = string("const_321_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6750_cast_fp16 = mul(x = x2_35_cast_fp16, y = const_321_promoted_to_fp16)[name = string("op_6750_cast_fp16")]; - int32 var_6752 = const()[name = string("op_6752"), val = int32(-1)]; - bool var_6753_interleave_0 = const()[name = string("op_6753_interleave_0"), val = bool(false)]; - tensor var_6753_cast_fp16 = concat(axis = var_6752, interleave = var_6753_interleave_0, values = (var_6750_cast_fp16, x1_35_cast_fp16))[name = string("op_6753_cast_fp16")]; - tensor var_6754_cast_fp16 = mul(x = var_6753_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6754_cast_fp16")]; - tensor key_states_33_cast_fp16 = add(x = var_6729_cast_fp16, y = var_6754_cast_fp16)[name = string("key_states_33_cast_fp16")]; - tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([7])]; - tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; - tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; - tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([8])]; - int32 concat_66_axis_0 = const()[name = string("concat_66_axis_0"), val = int32(0)]; - bool concat_66_interleave_0 = const()[name = string("concat_66_interleave_0"), val = bool(false)]; - tensor concat_66 = concat(axis = concat_66_axis_0, interleave = concat_66_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_66")]; - tensor concat_67_values1_0 = const()[name = string("concat_67_values1_0"), val = tensor([0])]; - tensor concat_67_values3_0 = const()[name = string("concat_67_values3_0"), val = tensor([0])]; - int32 concat_67_axis_0 = const()[name = string("concat_67_axis_0"), val = int32(0)]; - bool concat_67_interleave_0 = const()[name = string("concat_67_interleave_0"), val = bool(false)]; - tensor concat_67 = concat(axis = concat_67_axis_0, interleave = concat_67_interleave_0, values = (expand_dims_100, concat_67_values1_0, var_1909, concat_67_values3_0))[name = string("concat_67")]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_66, begin_mask = model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0, end = concat_67, end_mask = model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_15_stride_0, update = key_states_33_cast_fp16, x = coreml_update_state_67)[name = string("model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_16_write_state")]; - tensor coreml_update_state_68 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_16")]; - tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([29])]; - tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; - tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; - tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([30])]; - int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; - bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; - tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_70")]; - tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; - tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; - int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; - bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; - tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (expand_dims_106, concat_71_values1_0, var_1909, concat_71_values3_0))[name = string("concat_71")]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_70, begin_mask = model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0, end = concat_71, end_mask = model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_16_stride_0, update = var_6641, x = coreml_update_state_68)[name = string("model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_17_write_state")]; - tensor coreml_update_state_69 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_17")]; - tensor var_6809_begin_0 = const()[name = string("op_6809_begin_0"), val = tensor([7, 0, 0, 0])]; - tensor var_6809_end_0 = const()[name = string("op_6809_end_0"), val = tensor([8, 1, 512, 256])]; - tensor var_6809_end_mask_0 = const()[name = string("op_6809_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_6809_cast_fp16 = slice_by_index(begin = var_6809_begin_0, end = var_6809_end_0, end_mask = var_6809_end_mask_0, x = coreml_update_state_69)[name = string("op_6809_cast_fp16")]; - tensor var_6816_begin_0 = const()[name = string("op_6816_begin_0"), val = tensor([29, 0, 0, 0])]; - tensor var_6816_end_0 = const()[name = string("op_6816_end_0"), val = tensor([30, 1, 512, 256])]; - tensor var_6816_end_mask_0 = const()[name = string("op_6816_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_6816_cast_fp16 = slice_by_index(begin = var_6816_begin_0, end = var_6816_end_0, end_mask = var_6816_end_mask_0, x = coreml_update_state_69)[name = string("op_6816_cast_fp16")]; - tensor var_6853 = const()[name = string("op_6853"), val = tensor([1, 4, 1, 1])]; - tensor x_133_cast_fp16 = tile(reps = var_6853, x = var_6809_cast_fp16)[name = string("x_133_cast_fp16")]; - tensor var_6873 = const()[name = string("op_6873"), val = tensor([1, 4, 1, 1])]; - tensor x_139_cast_fp16 = tile(reps = var_6873, x = var_6816_cast_fp16)[name = string("x_139_cast_fp16")]; - bool var_6900_transpose_x_1 = const()[name = string("op_6900_transpose_x_1"), val = bool(false)]; - bool var_6900_transpose_y_1 = const()[name = string("op_6900_transpose_y_1"), val = bool(true)]; - tensor var_6900 = matmul(transpose_x = var_6900_transpose_x_1, transpose_y = var_6900_transpose_y_1, x = query_states_33_cast_fp16, y = x_133_cast_fp16)[name = string("op_6900")]; - fp16 var_6901_to_fp16 = const()[name = string("op_6901_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_49_cast_fp16 = mul(x = var_6900, y = var_6901_to_fp16)[name = string("attn_weights_49_cast_fp16")]; - tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = var_2083)[name = string("attn_weights_51_cast_fp16")]; - int32 var_6936 = const()[name = string("op_6936"), val = int32(-1)]; - tensor attn_weights_53_cast_fp16 = softmax(axis = var_6936, x = attn_weights_51_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; - bool attn_output_81_transpose_x_0 = const()[name = string("attn_output_81_transpose_x_0"), val = bool(false)]; - bool attn_output_81_transpose_y_0 = const()[name = string("attn_output_81_transpose_y_0"), val = bool(false)]; - tensor attn_output_81_cast_fp16 = matmul(transpose_x = attn_output_81_transpose_x_0, transpose_y = attn_output_81_transpose_y_0, x = attn_weights_53_cast_fp16, y = x_139_cast_fp16)[name = string("attn_output_81_cast_fp16")]; - tensor var_6947_perm_0 = const()[name = string("op_6947_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_6951 = const()[name = string("op_6951"), val = tensor([1, 1, 1024])]; - tensor var_6947_cast_fp16 = transpose(perm = var_6947_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_106")]; - tensor attn_output_85_cast_fp16 = reshape(shape = var_6951, x = var_6947_cast_fp16)[name = string("attn_output_85_cast_fp16")]; - tensor var_6956 = const()[name = string("op_6956"), val = tensor([0, 2, 1])]; - string var_6972_pad_type_0 = const()[name = string("op_6972_pad_type_0"), val = string("valid")]; - int32 var_6972_groups_0 = const()[name = string("op_6972_groups_0"), val = int32(1)]; - tensor var_6972_strides_0 = const()[name = string("op_6972_strides_0"), val = tensor([1])]; - tensor var_6972_pad_0 = const()[name = string("op_6972_pad_0"), val = tensor([0, 0])]; - tensor var_6972_dilations_0 = const()[name = string("op_6972_dilations_0"), val = tensor([1])]; - tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204493312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205378112))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_6957_cast_fp16 = transpose(perm = var_6956, x = attn_output_85_cast_fp16)[name = string("transpose_105")]; - tensor var_6972_cast_fp16 = conv(dilations = var_6972_dilations_0, groups = var_6972_groups_0, pad = var_6972_pad_0, pad_type = var_6972_pad_type_0, strides = var_6972_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_6957_cast_fp16)[name = string("op_6972_cast_fp16")]; - tensor var_6976 = const()[name = string("op_6976"), val = tensor([0, 2, 1])]; - int32 var_6987 = const()[name = string("op_6987"), val = int32(-1)]; - fp16 const_330_promoted_to_fp16 = const()[name = string("const_330_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_137_cast_fp16 = transpose(perm = var_6976, x = var_6972_cast_fp16)[name = string("transpose_104")]; - tensor var_6989_cast_fp16 = mul(x = hidden_states_137_cast_fp16, y = const_330_promoted_to_fp16)[name = string("op_6989_cast_fp16")]; - bool input_171_interleave_0 = const()[name = string("input_171_interleave_0"), val = bool(false)]; - tensor input_171_cast_fp16 = concat(axis = var_6987, interleave = input_171_interleave_0, values = (hidden_states_137_cast_fp16, var_6989_cast_fp16))[name = string("input_171_cast_fp16")]; - tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; - fp16 var_6984_to_fp16 = const()[name = string("op_6984_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_6984_to_fp16, x = input_171_cast_fp16)[name = string("normed_205_cast_fp16")]; - tensor normed_207_begin_0 = const()[name = string("normed_207_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_207_end_0 = const()[name = string("normed_207_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_207_end_mask_0 = const()[name = string("normed_207_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_207_cast_fp16 = slice_by_index(begin = normed_207_begin_0, end = normed_207_end_0, end_mask = normed_207_end_mask_0, x = normed_205_cast_fp16)[name = string("normed_207_cast_fp16")]; - tensor var_7003_to_fp16 = const()[name = string("op_7003_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205396608)))]; - tensor attn_output_89_cast_fp16 = mul(x = normed_207_cast_fp16, y = var_7003_to_fp16)[name = string("attn_output_89_cast_fp16")]; - tensor hidden_states_139_cast_fp16 = add(x = hidden_states_129_cast_fp16, y = attn_output_89_cast_fp16)[name = string("hidden_states_139_cast_fp16")]; - int32 var_7016 = const()[name = string("op_7016"), val = int32(-1)]; - fp16 const_334_promoted_to_fp16 = const()[name = string("const_334_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7018_cast_fp16 = mul(x = hidden_states_139_cast_fp16, y = const_334_promoted_to_fp16)[name = string("op_7018_cast_fp16")]; - bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; - tensor input_173_cast_fp16 = concat(axis = var_7016, interleave = input_173_interleave_0, values = (hidden_states_139_cast_fp16, var_7018_cast_fp16))[name = string("input_173_cast_fp16")]; - tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; - fp16 var_7013_to_fp16 = const()[name = string("op_7013_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_7013_to_fp16, x = input_173_cast_fp16)[name = string("normed_209_cast_fp16")]; - tensor normed_211_begin_0 = const()[name = string("normed_211_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_211_end_0 = const()[name = string("normed_211_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_211_end_mask_0 = const()[name = string("normed_211_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_211_cast_fp16 = slice_by_index(begin = normed_211_begin_0, end = normed_211_end_0, end_mask = normed_211_end_mask_0, x = normed_209_cast_fp16)[name = string("normed_211_cast_fp16")]; - tensor var_7032_to_fp16 = const()[name = string("op_7032_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205398976)))]; - tensor x_141_cast_fp16 = mul(x = normed_211_cast_fp16, y = var_7032_to_fp16)[name = string("x_141_cast_fp16")]; - tensor var_7044 = const()[name = string("op_7044"), val = tensor([0, 2, 1])]; - tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; - tensor var_7045_cast_fp16 = transpose(perm = var_7044, x = x_141_cast_fp16)[name = string("transpose_103")]; - tensor input_175_cast_fp16 = expand_dims(axes = input_175_axes_0, x = var_7045_cast_fp16)[name = string("input_175_cast_fp16")]; - string x_143_pad_type_0 = const()[name = string("x_143_pad_type_0"), val = string("valid")]; - tensor x_143_strides_0 = const()[name = string("x_143_strides_0"), val = tensor([1, 1])]; - tensor x_143_pad_0 = const()[name = string("x_143_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_143_dilations_0 = const()[name = string("x_143_dilations_0"), val = tensor([1, 1])]; - int32 x_143_groups_0 = const()[name = string("x_143_groups_0"), val = int32(1)]; - tensor model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205401344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211373376))))[name = string("model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_143_cast_fp16 = conv(dilations = x_143_dilations_0, groups = x_143_groups_0, pad = x_143_pad_0, pad_type = x_143_pad_type_0, strides = x_143_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_175_cast_fp16)[name = string("x_143_cast_fp16")]; - string b_17_pad_type_0 = const()[name = string("b_17_pad_type_0"), val = string("valid")]; - tensor b_17_strides_0 = const()[name = string("b_17_strides_0"), val = tensor([1, 1])]; - tensor b_17_pad_0 = const()[name = string("b_17_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_17_dilations_0 = const()[name = string("b_17_dilations_0"), val = tensor([1, 1])]; - int32 b_17_groups_0 = const()[name = string("b_17_groups_0"), val = int32(1)]; - tensor model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211484032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217456064))))[name = string("model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_17_cast_fp16 = conv(dilations = b_17_dilations_0, groups = b_17_groups_0, pad = b_17_pad_0, pad_type = b_17_pad_type_0, strides = b_17_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_175_cast_fp16)[name = string("b_17_cast_fp16")]; - string var_7070_mode_0 = const()[name = string("op_7070_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_7070_cast_fp16 = gelu(mode = var_7070_mode_0, x = x_143_cast_fp16)[name = string("op_7070_cast_fp16")]; - tensor input_177_cast_fp16 = mul(x = var_7070_cast_fp16, y = b_17_cast_fp16)[name = string("input_177_cast_fp16")]; - string e_17_pad_type_0 = const()[name = string("e_17_pad_type_0"), val = string("valid")]; - tensor e_17_strides_0 = const()[name = string("e_17_strides_0"), val = tensor([1, 1])]; - tensor e_17_pad_0 = const()[name = string("e_17_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_17_dilations_0 = const()[name = string("e_17_dilations_0"), val = tensor([1, 1])]; - int32 e_17_groups_0 = const()[name = string("e_17_groups_0"), val = int32(1)]; - tensor model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217566720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223538752))))[name = string("model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_17_cast_fp16 = conv(dilations = e_17_dilations_0, groups = e_17_groups_0, pad = e_17_pad_0, pad_type = e_17_pad_type_0, strides = e_17_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_177_cast_fp16)[name = string("e_17_cast_fp16")]; - tensor var_7078_axes_0 = const()[name = string("op_7078_axes_0"), val = tensor([2])]; - tensor var_7078_cast_fp16 = squeeze(axes = var_7078_axes_0, x = e_17_cast_fp16)[name = string("op_7078_cast_fp16")]; - tensor var_7079 = const()[name = string("op_7079"), val = tensor([0, 2, 1])]; - int32 var_7090 = const()[name = string("op_7090"), val = int32(-1)]; - fp16 const_338_promoted_to_fp16 = const()[name = string("const_338_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_141_cast_fp16 = transpose(perm = var_7079, x = var_7078_cast_fp16)[name = string("transpose_102")]; - tensor var_7092_cast_fp16 = mul(x = hidden_states_141_cast_fp16, y = const_338_promoted_to_fp16)[name = string("op_7092_cast_fp16")]; - bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; - tensor input_179_cast_fp16 = concat(axis = var_7090, interleave = input_179_interleave_0, values = (hidden_states_141_cast_fp16, var_7092_cast_fp16))[name = string("input_179_cast_fp16")]; - tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; - fp16 var_7087_to_fp16 = const()[name = string("op_7087_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_7087_to_fp16, x = input_179_cast_fp16)[name = string("normed_213_cast_fp16")]; - tensor normed_215_begin_0 = const()[name = string("normed_215_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_215_end_0 = const()[name = string("normed_215_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_215_end_mask_0 = const()[name = string("normed_215_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_215_cast_fp16 = slice_by_index(begin = normed_215_begin_0, end = normed_215_end_0, end_mask = normed_215_end_mask_0, x = normed_213_cast_fp16)[name = string("normed_215_cast_fp16")]; - tensor var_7106_to_fp16 = const()[name = string("op_7106_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223557248)))]; - tensor hidden_states_143_cast_fp16 = mul(x = normed_215_cast_fp16, y = var_7106_to_fp16)[name = string("hidden_states_143_cast_fp16")]; - tensor hidden_states_145_cast_fp16 = add(x = hidden_states_139_cast_fp16, y = hidden_states_143_cast_fp16)[name = string("hidden_states_145_cast_fp16")]; - int32 var_7157 = const()[name = string("op_7157"), val = int32(-1)]; - fp16 const_342_promoted_to_fp16 = const()[name = string("const_342_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7159_cast_fp16 = mul(x = hidden_states_145_cast_fp16, y = const_342_promoted_to_fp16)[name = string("op_7159_cast_fp16")]; - bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)]; - tensor input_181_cast_fp16 = concat(axis = var_7157, interleave = input_181_interleave_0, values = (hidden_states_145_cast_fp16, var_7159_cast_fp16))[name = string("input_181_cast_fp16")]; - tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; - fp16 var_7154_to_fp16 = const()[name = string("op_7154_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_7154_to_fp16, x = input_181_cast_fp16)[name = string("normed_217_cast_fp16")]; - tensor normed_219_begin_0 = const()[name = string("normed_219_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_219_end_0 = const()[name = string("normed_219_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_219_end_mask_0 = const()[name = string("normed_219_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_219_cast_fp16 = slice_by_index(begin = normed_219_begin_0, end = normed_219_end_0, end_mask = normed_219_end_mask_0, x = normed_217_cast_fp16)[name = string("normed_219_cast_fp16")]; - tensor var_7173_to_fp16 = const()[name = string("op_7173_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223559616)))]; - tensor hidden_states_147_cast_fp16 = mul(x = normed_219_cast_fp16, y = var_7173_to_fp16)[name = string("hidden_states_147_cast_fp16")]; - tensor var_7178 = const()[name = string("op_7178"), val = tensor([0, 2, 1])]; - tensor var_7181_axes_0 = const()[name = string("op_7181_axes_0"), val = tensor([2])]; - tensor var_7179_cast_fp16 = transpose(perm = var_7178, x = hidden_states_147_cast_fp16)[name = string("transpose_101")]; - tensor var_7181_cast_fp16 = expand_dims(axes = var_7181_axes_0, x = var_7179_cast_fp16)[name = string("op_7181_cast_fp16")]; - string var_7197_pad_type_0 = const()[name = string("op_7197_pad_type_0"), val = string("valid")]; - tensor var_7197_strides_0 = const()[name = string("op_7197_strides_0"), val = tensor([1, 1])]; - tensor var_7197_pad_0 = const()[name = string("op_7197_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_7197_dilations_0 = const()[name = string("op_7197_dilations_0"), val = tensor([1, 1])]; - int32 var_7197_groups_0 = const()[name = string("op_7197_groups_0"), val = int32(1)]; - tensor var_7197 = conv(dilations = var_7197_dilations_0, groups = var_7197_groups_0, pad = var_7197_pad_0, pad_type = var_7197_pad_type_0, strides = var_7197_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_7181_cast_fp16)[name = string("op_7197")]; - tensor var_7202 = const()[name = string("op_7202"), val = tensor([1, 4, 1, 256])]; - tensor var_7203 = reshape(shape = var_7202, x = var_7197)[name = string("op_7203")]; - string var_7219_pad_type_0 = const()[name = string("op_7219_pad_type_0"), val = string("valid")]; - tensor var_7219_strides_0 = const()[name = string("op_7219_strides_0"), val = tensor([1, 1])]; - tensor var_7219_pad_0 = const()[name = string("op_7219_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_7219_dilations_0 = const()[name = string("op_7219_dilations_0"), val = tensor([1, 1])]; - int32 var_7219_groups_0 = const()[name = string("op_7219_groups_0"), val = int32(1)]; - tensor var_7219 = conv(dilations = var_7219_dilations_0, groups = var_7219_groups_0, pad = var_7219_pad_0, pad_type = var_7219_pad_type_0, strides = var_7219_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_7181_cast_fp16)[name = string("op_7219")]; - tensor var_7224 = const()[name = string("op_7224"), val = tensor([1, 1, 1, 256])]; - tensor var_7225 = reshape(shape = var_7224, x = var_7219)[name = string("op_7225")]; - string var_7241_pad_type_0 = const()[name = string("op_7241_pad_type_0"), val = string("valid")]; - tensor var_7241_strides_0 = const()[name = string("op_7241_strides_0"), val = tensor([1, 1])]; - tensor var_7241_pad_0 = const()[name = string("op_7241_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_7241_dilations_0 = const()[name = string("op_7241_dilations_0"), val = tensor([1, 1])]; - int32 var_7241_groups_0 = const()[name = string("op_7241_groups_0"), val = int32(1)]; - tensor var_7241 = conv(dilations = var_7241_dilations_0, groups = var_7241_groups_0, pad = var_7241_pad_0, pad_type = var_7241_pad_type_0, strides = var_7241_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_7181_cast_fp16)[name = string("op_7241")]; - tensor var_7246 = const()[name = string("op_7246"), val = tensor([1, 1, 1, 256])]; - tensor var_7247 = reshape(shape = var_7246, x = var_7241)[name = string("op_7247")]; - int32 var_7262 = const()[name = string("op_7262"), val = int32(-1)]; - fp16 const_346_promoted = const()[name = string("const_346_promoted"), val = fp16(-0x1p+0)]; - tensor var_7264 = mul(x = var_7203, y = const_346_promoted)[name = string("op_7264")]; - bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; - tensor input_185 = concat(axis = var_7262, interleave = input_185_interleave_0, values = (var_7203, var_7264))[name = string("input_185")]; - tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; - fp16 var_7259_to_fp16 = const()[name = string("op_7259_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_7259_to_fp16, x = input_185)[name = string("normed_221_cast_fp16")]; - tensor normed_223_begin_0 = const()[name = string("normed_223_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_223_end_0 = const()[name = string("normed_223_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_223_end_mask_0 = const()[name = string("normed_223_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_223 = slice_by_index(begin = normed_223_begin_0, end = normed_223_end_0, end_mask = normed_223_end_mask_0, x = normed_221_cast_fp16)[name = string("normed_223")]; - tensor var_7278_to_fp16 = const()[name = string("op_7278_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223561984)))]; - tensor q_19_cast_fp16 = mul(x = normed_223, y = var_7278_to_fp16)[name = string("q_19_cast_fp16")]; - int32 var_7289 = const()[name = string("op_7289"), val = int32(-1)]; - fp16 const_350_promoted = const()[name = string("const_350_promoted"), val = fp16(-0x1p+0)]; - tensor var_7291 = mul(x = var_7225, y = const_350_promoted)[name = string("op_7291")]; - bool input_187_interleave_0 = const()[name = string("input_187_interleave_0"), val = bool(false)]; - tensor input_187 = concat(axis = var_7289, interleave = input_187_interleave_0, values = (var_7225, var_7291))[name = string("input_187")]; - tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; - fp16 var_7286_to_fp16 = const()[name = string("op_7286_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_7286_to_fp16, x = input_187)[name = string("normed_225_cast_fp16")]; - tensor normed_227_begin_0 = const()[name = string("normed_227_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_227_end_0 = const()[name = string("normed_227_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_227_end_mask_0 = const()[name = string("normed_227_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_227 = slice_by_index(begin = normed_227_begin_0, end = normed_227_end_0, end_mask = normed_227_end_mask_0, x = normed_225_cast_fp16)[name = string("normed_227")]; - tensor var_7305_to_fp16 = const()[name = string("op_7305_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223562560)))]; - tensor k_19_cast_fp16 = mul(x = normed_227, y = var_7305_to_fp16)[name = string("k_19_cast_fp16")]; - tensor var_7307_cast_fp16 = mul(x = q_19_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7307_cast_fp16")]; - tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19_cast_fp16)[name = string("x1_37_cast_fp16")]; - tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19_cast_fp16)[name = string("x2_37_cast_fp16")]; - fp16 const_356_promoted_to_fp16 = const()[name = string("const_356_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7328_cast_fp16 = mul(x = x2_37_cast_fp16, y = const_356_promoted_to_fp16)[name = string("op_7328_cast_fp16")]; - int32 var_7330 = const()[name = string("op_7330"), val = int32(-1)]; - bool var_7331_interleave_0 = const()[name = string("op_7331_interleave_0"), val = bool(false)]; - tensor var_7331_cast_fp16 = concat(axis = var_7330, interleave = var_7331_interleave_0, values = (var_7328_cast_fp16, x1_37_cast_fp16))[name = string("op_7331_cast_fp16")]; - tensor var_7332_cast_fp16 = mul(x = var_7331_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7332_cast_fp16")]; - tensor query_states_37_cast_fp16 = add(x = var_7307_cast_fp16, y = var_7332_cast_fp16)[name = string("query_states_37_cast_fp16")]; - tensor var_7335_cast_fp16 = mul(x = k_19_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7335_cast_fp16")]; - tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19_cast_fp16)[name = string("x1_39_cast_fp16")]; - tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19_cast_fp16)[name = string("x2_39_cast_fp16")]; - fp16 const_359_promoted_to_fp16 = const()[name = string("const_359_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7356_cast_fp16 = mul(x = x2_39_cast_fp16, y = const_359_promoted_to_fp16)[name = string("op_7356_cast_fp16")]; - int32 var_7358 = const()[name = string("op_7358"), val = int32(-1)]; - bool var_7359_interleave_0 = const()[name = string("op_7359_interleave_0"), val = bool(false)]; - tensor var_7359_cast_fp16 = concat(axis = var_7358, interleave = var_7359_interleave_0, values = (var_7356_cast_fp16, x1_39_cast_fp16))[name = string("op_7359_cast_fp16")]; - tensor var_7360_cast_fp16 = mul(x = var_7359_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7360_cast_fp16")]; - tensor key_states_37_cast_fp16 = add(x = var_7335_cast_fp16, y = var_7360_cast_fp16)[name = string("key_states_37_cast_fp16")]; - tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([8])]; - tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; - tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; - tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([9])]; - int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; - bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; - tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_108, expand_dims_109, current_pos, expand_dims_111))[name = string("concat_74")]; - tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; - tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; - int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; - bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; - tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_112, concat_75_values1_0, var_1909, concat_75_values3_0))[name = string("concat_75")]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_17_stride_0, update = key_states_37_cast_fp16, x = coreml_update_state_69)[name = string("model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_18_write_state")]; - tensor coreml_update_state_70 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_18")]; - tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([30])]; - tensor expand_dims_115 = const()[name = string("expand_dims_115"), val = tensor([0])]; - tensor expand_dims_117 = const()[name = string("expand_dims_117"), val = tensor([0])]; - tensor expand_dims_118 = const()[name = string("expand_dims_118"), val = tensor([31])]; - int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; - bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; - tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_114, expand_dims_115, current_pos, expand_dims_117))[name = string("concat_78")]; - tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; - tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; - int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; - bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; - tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_118, concat_79_values1_0, var_1909, concat_79_values3_0))[name = string("concat_79")]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_18_stride_0, update = var_7247, x = coreml_update_state_70)[name = string("model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_19_write_state")]; - tensor coreml_update_state_71 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_19")]; - tensor var_7415_begin_0 = const()[name = string("op_7415_begin_0"), val = tensor([8, 0, 0, 0])]; - tensor var_7415_end_0 = const()[name = string("op_7415_end_0"), val = tensor([9, 1, 512, 256])]; - tensor var_7415_end_mask_0 = const()[name = string("op_7415_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_7415_cast_fp16 = slice_by_index(begin = var_7415_begin_0, end = var_7415_end_0, end_mask = var_7415_end_mask_0, x = coreml_update_state_71)[name = string("op_7415_cast_fp16")]; - tensor var_7422_begin_0 = const()[name = string("op_7422_begin_0"), val = tensor([30, 0, 0, 0])]; - tensor var_7422_end_0 = const()[name = string("op_7422_end_0"), val = tensor([31, 1, 512, 256])]; - tensor var_7422_end_mask_0 = const()[name = string("op_7422_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_7422_cast_fp16 = slice_by_index(begin = var_7422_begin_0, end = var_7422_end_0, end_mask = var_7422_end_mask_0, x = coreml_update_state_71)[name = string("op_7422_cast_fp16")]; - tensor var_7459 = const()[name = string("op_7459"), val = tensor([1, 4, 1, 1])]; - tensor x_149_cast_fp16 = tile(reps = var_7459, x = var_7415_cast_fp16)[name = string("x_149_cast_fp16")]; - tensor var_7479 = const()[name = string("op_7479"), val = tensor([1, 4, 1, 1])]; - tensor x_155_cast_fp16 = tile(reps = var_7479, x = var_7422_cast_fp16)[name = string("x_155_cast_fp16")]; - bool var_7506_transpose_x_1 = const()[name = string("op_7506_transpose_x_1"), val = bool(false)]; - bool var_7506_transpose_y_1 = const()[name = string("op_7506_transpose_y_1"), val = bool(true)]; - tensor var_7506 = matmul(transpose_x = var_7506_transpose_x_1, transpose_y = var_7506_transpose_y_1, x = query_states_37_cast_fp16, y = x_149_cast_fp16)[name = string("op_7506")]; - fp16 var_7507_to_fp16 = const()[name = string("op_7507_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_55_cast_fp16 = mul(x = var_7506, y = var_7507_to_fp16)[name = string("attn_weights_55_cast_fp16")]; - tensor attn_weights_57_cast_fp16 = add(x = attn_weights_55_cast_fp16, y = var_2083)[name = string("attn_weights_57_cast_fp16")]; - int32 var_7542 = const()[name = string("op_7542"), val = int32(-1)]; - tensor attn_weights_59_cast_fp16 = softmax(axis = var_7542, x = attn_weights_57_cast_fp16)[name = string("attn_weights_59_cast_fp16")]; - bool attn_output_91_transpose_x_0 = const()[name = string("attn_output_91_transpose_x_0"), val = bool(false)]; - bool attn_output_91_transpose_y_0 = const()[name = string("attn_output_91_transpose_y_0"), val = bool(false)]; - tensor attn_output_91_cast_fp16 = matmul(transpose_x = attn_output_91_transpose_x_0, transpose_y = attn_output_91_transpose_y_0, x = attn_weights_59_cast_fp16, y = x_155_cast_fp16)[name = string("attn_output_91_cast_fp16")]; - tensor var_7553_perm_0 = const()[name = string("op_7553_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_7557 = const()[name = string("op_7557"), val = tensor([1, 1, 1024])]; - tensor var_7553_cast_fp16 = transpose(perm = var_7553_perm_0, x = attn_output_91_cast_fp16)[name = string("transpose_100")]; - tensor attn_output_95_cast_fp16 = reshape(shape = var_7557, x = var_7553_cast_fp16)[name = string("attn_output_95_cast_fp16")]; - tensor var_7562 = const()[name = string("op_7562"), val = tensor([0, 2, 1])]; - string var_7578_pad_type_0 = const()[name = string("op_7578_pad_type_0"), val = string("valid")]; - int32 var_7578_groups_0 = const()[name = string("op_7578_groups_0"), val = int32(1)]; - tensor var_7578_strides_0 = const()[name = string("op_7578_strides_0"), val = tensor([1])]; - tensor var_7578_pad_0 = const()[name = string("op_7578_pad_0"), val = tensor([0, 0])]; - tensor var_7578_dilations_0 = const()[name = string("op_7578_dilations_0"), val = tensor([1])]; - tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223563136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224447936))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_7563_cast_fp16 = transpose(perm = var_7562, x = attn_output_95_cast_fp16)[name = string("transpose_99")]; - tensor var_7578_cast_fp16 = conv(dilations = var_7578_dilations_0, groups = var_7578_groups_0, pad = var_7578_pad_0, pad_type = var_7578_pad_type_0, strides = var_7578_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_7563_cast_fp16)[name = string("op_7578_cast_fp16")]; - tensor var_7582 = const()[name = string("op_7582"), val = tensor([0, 2, 1])]; - int32 var_7593 = const()[name = string("op_7593"), val = int32(-1)]; - fp16 const_368_promoted_to_fp16 = const()[name = string("const_368_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_153_cast_fp16 = transpose(perm = var_7582, x = var_7578_cast_fp16)[name = string("transpose_98")]; - tensor var_7595_cast_fp16 = mul(x = hidden_states_153_cast_fp16, y = const_368_promoted_to_fp16)[name = string("op_7595_cast_fp16")]; - bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; - tensor input_191_cast_fp16 = concat(axis = var_7593, interleave = input_191_interleave_0, values = (hidden_states_153_cast_fp16, var_7595_cast_fp16))[name = string("input_191_cast_fp16")]; - tensor normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor([-1])]; - fp16 var_7590_to_fp16 = const()[name = string("op_7590_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_7590_to_fp16, x = input_191_cast_fp16)[name = string("normed_229_cast_fp16")]; - tensor normed_231_begin_0 = const()[name = string("normed_231_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_231_end_0 = const()[name = string("normed_231_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_231_end_mask_0 = const()[name = string("normed_231_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_231_cast_fp16 = slice_by_index(begin = normed_231_begin_0, end = normed_231_end_0, end_mask = normed_231_end_mask_0, x = normed_229_cast_fp16)[name = string("normed_231_cast_fp16")]; - tensor var_7609_to_fp16 = const()[name = string("op_7609_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224466432)))]; - tensor attn_output_99_cast_fp16 = mul(x = normed_231_cast_fp16, y = var_7609_to_fp16)[name = string("attn_output_99_cast_fp16")]; - tensor hidden_states_155_cast_fp16 = add(x = hidden_states_145_cast_fp16, y = attn_output_99_cast_fp16)[name = string("hidden_states_155_cast_fp16")]; - int32 var_7622 = const()[name = string("op_7622"), val = int32(-1)]; - fp16 const_372_promoted_to_fp16 = const()[name = string("const_372_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7624_cast_fp16 = mul(x = hidden_states_155_cast_fp16, y = const_372_promoted_to_fp16)[name = string("op_7624_cast_fp16")]; - bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; - tensor input_193_cast_fp16 = concat(axis = var_7622, interleave = input_193_interleave_0, values = (hidden_states_155_cast_fp16, var_7624_cast_fp16))[name = string("input_193_cast_fp16")]; - tensor normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor([-1])]; - fp16 var_7619_to_fp16 = const()[name = string("op_7619_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_7619_to_fp16, x = input_193_cast_fp16)[name = string("normed_233_cast_fp16")]; - tensor normed_235_begin_0 = const()[name = string("normed_235_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_235_end_0 = const()[name = string("normed_235_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_235_end_mask_0 = const()[name = string("normed_235_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_235_cast_fp16 = slice_by_index(begin = normed_235_begin_0, end = normed_235_end_0, end_mask = normed_235_end_mask_0, x = normed_233_cast_fp16)[name = string("normed_235_cast_fp16")]; - tensor var_7638_to_fp16 = const()[name = string("op_7638_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224468800)))]; - tensor x_157_cast_fp16 = mul(x = normed_235_cast_fp16, y = var_7638_to_fp16)[name = string("x_157_cast_fp16")]; - tensor var_7650 = const()[name = string("op_7650"), val = tensor([0, 2, 1])]; - tensor input_195_axes_0 = const()[name = string("input_195_axes_0"), val = tensor([2])]; - tensor var_7651_cast_fp16 = transpose(perm = var_7650, x = x_157_cast_fp16)[name = string("transpose_97")]; - tensor input_195_cast_fp16 = expand_dims(axes = input_195_axes_0, x = var_7651_cast_fp16)[name = string("input_195_cast_fp16")]; - string x_159_pad_type_0 = const()[name = string("x_159_pad_type_0"), val = string("valid")]; - tensor x_159_strides_0 = const()[name = string("x_159_strides_0"), val = tensor([1, 1])]; - tensor x_159_pad_0 = const()[name = string("x_159_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_159_dilations_0 = const()[name = string("x_159_dilations_0"), val = tensor([1, 1])]; - int32 x_159_groups_0 = const()[name = string("x_159_groups_0"), val = int32(1)]; - tensor model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224471168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230443200))))[name = string("model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_159_cast_fp16 = conv(dilations = x_159_dilations_0, groups = x_159_groups_0, pad = x_159_pad_0, pad_type = x_159_pad_type_0, strides = x_159_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_195_cast_fp16)[name = string("x_159_cast_fp16")]; - string b_19_pad_type_0 = const()[name = string("b_19_pad_type_0"), val = string("valid")]; - tensor b_19_strides_0 = const()[name = string("b_19_strides_0"), val = tensor([1, 1])]; - tensor b_19_pad_0 = const()[name = string("b_19_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_19_dilations_0 = const()[name = string("b_19_dilations_0"), val = tensor([1, 1])]; - int32 b_19_groups_0 = const()[name = string("b_19_groups_0"), val = int32(1)]; - tensor model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230553856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236525888))))[name = string("model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_19_cast_fp16 = conv(dilations = b_19_dilations_0, groups = b_19_groups_0, pad = b_19_pad_0, pad_type = b_19_pad_type_0, strides = b_19_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_195_cast_fp16)[name = string("b_19_cast_fp16")]; - string var_7676_mode_0 = const()[name = string("op_7676_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_7676_cast_fp16 = gelu(mode = var_7676_mode_0, x = x_159_cast_fp16)[name = string("op_7676_cast_fp16")]; - tensor input_197_cast_fp16 = mul(x = var_7676_cast_fp16, y = b_19_cast_fp16)[name = string("input_197_cast_fp16")]; - string e_19_pad_type_0 = const()[name = string("e_19_pad_type_0"), val = string("valid")]; - tensor e_19_strides_0 = const()[name = string("e_19_strides_0"), val = tensor([1, 1])]; - tensor e_19_pad_0 = const()[name = string("e_19_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_19_dilations_0 = const()[name = string("e_19_dilations_0"), val = tensor([1, 1])]; - int32 e_19_groups_0 = const()[name = string("e_19_groups_0"), val = int32(1)]; - tensor model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236636544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242608576))))[name = string("model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_19_cast_fp16 = conv(dilations = e_19_dilations_0, groups = e_19_groups_0, pad = e_19_pad_0, pad_type = e_19_pad_type_0, strides = e_19_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_197_cast_fp16)[name = string("e_19_cast_fp16")]; - tensor var_7684_axes_0 = const()[name = string("op_7684_axes_0"), val = tensor([2])]; - tensor var_7684_cast_fp16 = squeeze(axes = var_7684_axes_0, x = e_19_cast_fp16)[name = string("op_7684_cast_fp16")]; - tensor var_7685 = const()[name = string("op_7685"), val = tensor([0, 2, 1])]; - int32 var_7696 = const()[name = string("op_7696"), val = int32(-1)]; - fp16 const_376_promoted_to_fp16 = const()[name = string("const_376_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_157_cast_fp16 = transpose(perm = var_7685, x = var_7684_cast_fp16)[name = string("transpose_96")]; - tensor var_7698_cast_fp16 = mul(x = hidden_states_157_cast_fp16, y = const_376_promoted_to_fp16)[name = string("op_7698_cast_fp16")]; - bool input_199_interleave_0 = const()[name = string("input_199_interleave_0"), val = bool(false)]; - tensor input_199_cast_fp16 = concat(axis = var_7696, interleave = input_199_interleave_0, values = (hidden_states_157_cast_fp16, var_7698_cast_fp16))[name = string("input_199_cast_fp16")]; - tensor normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor([-1])]; - fp16 var_7693_to_fp16 = const()[name = string("op_7693_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_7693_to_fp16, x = input_199_cast_fp16)[name = string("normed_237_cast_fp16")]; - tensor normed_239_begin_0 = const()[name = string("normed_239_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_239_end_0 = const()[name = string("normed_239_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_239_end_mask_0 = const()[name = string("normed_239_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_239_cast_fp16 = slice_by_index(begin = normed_239_begin_0, end = normed_239_end_0, end_mask = normed_239_end_mask_0, x = normed_237_cast_fp16)[name = string("normed_239_cast_fp16")]; - tensor var_7712_to_fp16 = const()[name = string("op_7712_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242627072)))]; - tensor hidden_states_159_cast_fp16 = mul(x = normed_239_cast_fp16, y = var_7712_to_fp16)[name = string("hidden_states_159_cast_fp16")]; - tensor hidden_states_161_cast_fp16 = add(x = hidden_states_155_cast_fp16, y = hidden_states_159_cast_fp16)[name = string("hidden_states_161_cast_fp16")]; - int32 var_7763 = const()[name = string("op_7763"), val = int32(-1)]; - fp16 const_380_promoted_to_fp16 = const()[name = string("const_380_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7765_cast_fp16 = mul(x = hidden_states_161_cast_fp16, y = const_380_promoted_to_fp16)[name = string("op_7765_cast_fp16")]; - bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)]; - tensor input_201_cast_fp16 = concat(axis = var_7763, interleave = input_201_interleave_0, values = (hidden_states_161_cast_fp16, var_7765_cast_fp16))[name = string("input_201_cast_fp16")]; - tensor normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor([-1])]; - fp16 var_7760_to_fp16 = const()[name = string("op_7760_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_7760_to_fp16, x = input_201_cast_fp16)[name = string("normed_241_cast_fp16")]; - tensor normed_243_begin_0 = const()[name = string("normed_243_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_243_end_0 = const()[name = string("normed_243_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_243_end_mask_0 = const()[name = string("normed_243_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_243_cast_fp16 = slice_by_index(begin = normed_243_begin_0, end = normed_243_end_0, end_mask = normed_243_end_mask_0, x = normed_241_cast_fp16)[name = string("normed_243_cast_fp16")]; - tensor var_7779_to_fp16 = const()[name = string("op_7779_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242629440)))]; - tensor hidden_states_163_cast_fp16 = mul(x = normed_243_cast_fp16, y = var_7779_to_fp16)[name = string("hidden_states_163_cast_fp16")]; - tensor var_7784 = const()[name = string("op_7784"), val = tensor([0, 2, 1])]; - tensor var_7787_axes_0 = const()[name = string("op_7787_axes_0"), val = tensor([2])]; - tensor var_7785_cast_fp16 = transpose(perm = var_7784, x = hidden_states_163_cast_fp16)[name = string("transpose_95")]; - tensor var_7787_cast_fp16 = expand_dims(axes = var_7787_axes_0, x = var_7785_cast_fp16)[name = string("op_7787_cast_fp16")]; - string var_7803_pad_type_0 = const()[name = string("op_7803_pad_type_0"), val = string("valid")]; - tensor var_7803_strides_0 = const()[name = string("op_7803_strides_0"), val = tensor([1, 1])]; - tensor var_7803_pad_0 = const()[name = string("op_7803_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_7803_dilations_0 = const()[name = string("op_7803_dilations_0"), val = tensor([1, 1])]; - int32 var_7803_groups_0 = const()[name = string("op_7803_groups_0"), val = int32(1)]; - tensor var_7803 = conv(dilations = var_7803_dilations_0, groups = var_7803_groups_0, pad = var_7803_pad_0, pad_type = var_7803_pad_type_0, strides = var_7803_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_7787_cast_fp16)[name = string("op_7803")]; - tensor var_7808 = const()[name = string("op_7808"), val = tensor([1, 4, 1, 256])]; - tensor var_7809 = reshape(shape = var_7808, x = var_7803)[name = string("op_7809")]; - string var_7825_pad_type_0 = const()[name = string("op_7825_pad_type_0"), val = string("valid")]; - tensor var_7825_strides_0 = const()[name = string("op_7825_strides_0"), val = tensor([1, 1])]; - tensor var_7825_pad_0 = const()[name = string("op_7825_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_7825_dilations_0 = const()[name = string("op_7825_dilations_0"), val = tensor([1, 1])]; - int32 var_7825_groups_0 = const()[name = string("op_7825_groups_0"), val = int32(1)]; - tensor var_7825 = conv(dilations = var_7825_dilations_0, groups = var_7825_groups_0, pad = var_7825_pad_0, pad_type = var_7825_pad_type_0, strides = var_7825_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_7787_cast_fp16)[name = string("op_7825")]; - tensor var_7830 = const()[name = string("op_7830"), val = tensor([1, 1, 1, 256])]; - tensor var_7831 = reshape(shape = var_7830, x = var_7825)[name = string("op_7831")]; - string var_7847_pad_type_0 = const()[name = string("op_7847_pad_type_0"), val = string("valid")]; - tensor var_7847_strides_0 = const()[name = string("op_7847_strides_0"), val = tensor([1, 1])]; - tensor var_7847_pad_0 = const()[name = string("op_7847_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_7847_dilations_0 = const()[name = string("op_7847_dilations_0"), val = tensor([1, 1])]; - int32 var_7847_groups_0 = const()[name = string("op_7847_groups_0"), val = int32(1)]; - tensor var_7847 = conv(dilations = var_7847_dilations_0, groups = var_7847_groups_0, pad = var_7847_pad_0, pad_type = var_7847_pad_type_0, strides = var_7847_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_7787_cast_fp16)[name = string("op_7847")]; - tensor var_7852 = const()[name = string("op_7852"), val = tensor([1, 1, 1, 256])]; - tensor var_7853 = reshape(shape = var_7852, x = var_7847)[name = string("op_7853")]; - int32 var_7868 = const()[name = string("op_7868"), val = int32(-1)]; - fp16 const_384_promoted = const()[name = string("const_384_promoted"), val = fp16(-0x1p+0)]; - tensor var_7870 = mul(x = var_7809, y = const_384_promoted)[name = string("op_7870")]; - bool input_205_interleave_0 = const()[name = string("input_205_interleave_0"), val = bool(false)]; - tensor input_205 = concat(axis = var_7868, interleave = input_205_interleave_0, values = (var_7809, var_7870))[name = string("input_205")]; - tensor normed_245_axes_0 = const()[name = string("normed_245_axes_0"), val = tensor([-1])]; - fp16 var_7865_to_fp16 = const()[name = string("op_7865_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_245_cast_fp16 = layer_norm(axes = normed_245_axes_0, epsilon = var_7865_to_fp16, x = input_205)[name = string("normed_245_cast_fp16")]; - tensor normed_247_begin_0 = const()[name = string("normed_247_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_247_end_0 = const()[name = string("normed_247_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_247_end_mask_0 = const()[name = string("normed_247_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_247 = slice_by_index(begin = normed_247_begin_0, end = normed_247_end_0, end_mask = normed_247_end_mask_0, x = normed_245_cast_fp16)[name = string("normed_247")]; - tensor var_7884_to_fp16 = const()[name = string("op_7884_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242631808)))]; - tensor q_21_cast_fp16 = mul(x = normed_247, y = var_7884_to_fp16)[name = string("q_21_cast_fp16")]; - int32 var_7895 = const()[name = string("op_7895"), val = int32(-1)]; - fp16 const_388_promoted = const()[name = string("const_388_promoted"), val = fp16(-0x1p+0)]; - tensor var_7897 = mul(x = var_7831, y = const_388_promoted)[name = string("op_7897")]; - bool input_207_interleave_0 = const()[name = string("input_207_interleave_0"), val = bool(false)]; - tensor input_207 = concat(axis = var_7895, interleave = input_207_interleave_0, values = (var_7831, var_7897))[name = string("input_207")]; - tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; - fp16 var_7892_to_fp16 = const()[name = string("op_7892_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_7892_to_fp16, x = input_207)[name = string("normed_249_cast_fp16")]; - tensor normed_251_begin_0 = const()[name = string("normed_251_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_251_end_0 = const()[name = string("normed_251_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_251_end_mask_0 = const()[name = string("normed_251_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_251 = slice_by_index(begin = normed_251_begin_0, end = normed_251_end_0, end_mask = normed_251_end_mask_0, x = normed_249_cast_fp16)[name = string("normed_251")]; - tensor var_7911_to_fp16 = const()[name = string("op_7911_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242632384)))]; - tensor k_21_cast_fp16 = mul(x = normed_251, y = var_7911_to_fp16)[name = string("k_21_cast_fp16")]; - tensor var_7913_cast_fp16 = mul(x = q_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7913_cast_fp16")]; - tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21_cast_fp16)[name = string("x1_41_cast_fp16")]; - tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21_cast_fp16)[name = string("x2_41_cast_fp16")]; - fp16 const_394_promoted_to_fp16 = const()[name = string("const_394_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7934_cast_fp16 = mul(x = x2_41_cast_fp16, y = const_394_promoted_to_fp16)[name = string("op_7934_cast_fp16")]; - int32 var_7936 = const()[name = string("op_7936"), val = int32(-1)]; - bool var_7937_interleave_0 = const()[name = string("op_7937_interleave_0"), val = bool(false)]; - tensor var_7937_cast_fp16 = concat(axis = var_7936, interleave = var_7937_interleave_0, values = (var_7934_cast_fp16, x1_41_cast_fp16))[name = string("op_7937_cast_fp16")]; - tensor var_7938_cast_fp16 = mul(x = var_7937_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7938_cast_fp16")]; - tensor query_states_41_cast_fp16 = add(x = var_7913_cast_fp16, y = var_7938_cast_fp16)[name = string("query_states_41_cast_fp16")]; - tensor var_7941_cast_fp16 = mul(x = k_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7941_cast_fp16")]; - tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21_cast_fp16)[name = string("x1_43_cast_fp16")]; - tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21_cast_fp16)[name = string("x2_43_cast_fp16")]; - fp16 const_397_promoted_to_fp16 = const()[name = string("const_397_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7962_cast_fp16 = mul(x = x2_43_cast_fp16, y = const_397_promoted_to_fp16)[name = string("op_7962_cast_fp16")]; - int32 var_7964 = const()[name = string("op_7964"), val = int32(-1)]; - bool var_7965_interleave_0 = const()[name = string("op_7965_interleave_0"), val = bool(false)]; - tensor var_7965_cast_fp16 = concat(axis = var_7964, interleave = var_7965_interleave_0, values = (var_7962_cast_fp16, x1_43_cast_fp16))[name = string("op_7965_cast_fp16")]; - tensor var_7966_cast_fp16 = mul(x = var_7965_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7966_cast_fp16")]; - tensor key_states_41_cast_fp16 = add(x = var_7941_cast_fp16, y = var_7966_cast_fp16)[name = string("key_states_41_cast_fp16")]; - tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([9])]; - tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; - tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; - tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([10])]; - int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; - bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; - tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (expand_dims_120, expand_dims_121, current_pos, expand_dims_123))[name = string("concat_82")]; - tensor concat_83_values1_0 = const()[name = string("concat_83_values1_0"), val = tensor([0])]; - tensor concat_83_values3_0 = const()[name = string("concat_83_values3_0"), val = tensor([0])]; - int32 concat_83_axis_0 = const()[name = string("concat_83_axis_0"), val = int32(0)]; - bool concat_83_interleave_0 = const()[name = string("concat_83_interleave_0"), val = bool(false)]; - tensor concat_83 = concat(axis = concat_83_axis_0, interleave = concat_83_interleave_0, values = (expand_dims_124, concat_83_values1_0, var_1909, concat_83_values3_0))[name = string("concat_83")]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_82, begin_mask = model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0, end = concat_83, end_mask = model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_19_stride_0, update = key_states_41_cast_fp16, x = coreml_update_state_71)[name = string("model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_20_write_state")]; - tensor coreml_update_state_72 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_20")]; - tensor expand_dims_126 = const()[name = string("expand_dims_126"), val = tensor([31])]; - tensor expand_dims_127 = const()[name = string("expand_dims_127"), val = tensor([0])]; - tensor expand_dims_129 = const()[name = string("expand_dims_129"), val = tensor([0])]; - tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([32])]; - int32 concat_86_axis_0 = const()[name = string("concat_86_axis_0"), val = int32(0)]; - bool concat_86_interleave_0 = const()[name = string("concat_86_interleave_0"), val = bool(false)]; - tensor concat_86 = concat(axis = concat_86_axis_0, interleave = concat_86_interleave_0, values = (expand_dims_126, expand_dims_127, current_pos, expand_dims_129))[name = string("concat_86")]; - tensor concat_87_values1_0 = const()[name = string("concat_87_values1_0"), val = tensor([0])]; - tensor concat_87_values3_0 = const()[name = string("concat_87_values3_0"), val = tensor([0])]; - int32 concat_87_axis_0 = const()[name = string("concat_87_axis_0"), val = int32(0)]; - bool concat_87_interleave_0 = const()[name = string("concat_87_interleave_0"), val = bool(false)]; - tensor concat_87 = concat(axis = concat_87_axis_0, interleave = concat_87_interleave_0, values = (expand_dims_130, concat_87_values1_0, var_1909, concat_87_values3_0))[name = string("concat_87")]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_86, begin_mask = model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0, end = concat_87, end_mask = model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_20_stride_0, update = var_7853, x = coreml_update_state_72)[name = string("model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_21_write_state")]; - tensor coreml_update_state_73 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_21")]; - tensor var_8021_begin_0 = const()[name = string("op_8021_begin_0"), val = tensor([9, 0, 0, 0])]; - tensor var_8021_end_0 = const()[name = string("op_8021_end_0"), val = tensor([10, 1, 512, 256])]; - tensor var_8021_end_mask_0 = const()[name = string("op_8021_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_8021_cast_fp16 = slice_by_index(begin = var_8021_begin_0, end = var_8021_end_0, end_mask = var_8021_end_mask_0, x = coreml_update_state_73)[name = string("op_8021_cast_fp16")]; - tensor var_8028_begin_0 = const()[name = string("op_8028_begin_0"), val = tensor([31, 0, 0, 0])]; - tensor var_8028_end_0 = const()[name = string("op_8028_end_0"), val = tensor([32, 1, 512, 256])]; - tensor var_8028_end_mask_0 = const()[name = string("op_8028_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_8028_cast_fp16 = slice_by_index(begin = var_8028_begin_0, end = var_8028_end_0, end_mask = var_8028_end_mask_0, x = coreml_update_state_73)[name = string("op_8028_cast_fp16")]; - tensor var_8065 = const()[name = string("op_8065"), val = tensor([1, 4, 1, 1])]; - tensor x_165_cast_fp16 = tile(reps = var_8065, x = var_8021_cast_fp16)[name = string("x_165_cast_fp16")]; - tensor var_8085 = const()[name = string("op_8085"), val = tensor([1, 4, 1, 1])]; - tensor x_171_cast_fp16 = tile(reps = var_8085, x = var_8028_cast_fp16)[name = string("x_171_cast_fp16")]; - bool var_8112_transpose_x_1 = const()[name = string("op_8112_transpose_x_1"), val = bool(false)]; - bool var_8112_transpose_y_1 = const()[name = string("op_8112_transpose_y_1"), val = bool(true)]; - tensor var_8112 = matmul(transpose_x = var_8112_transpose_x_1, transpose_y = var_8112_transpose_y_1, x = query_states_41_cast_fp16, y = x_165_cast_fp16)[name = string("op_8112")]; - fp16 var_8113_to_fp16 = const()[name = string("op_8113_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_61_cast_fp16 = mul(x = var_8112, y = var_8113_to_fp16)[name = string("attn_weights_61_cast_fp16")]; - tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = var_2083)[name = string("attn_weights_63_cast_fp16")]; - int32 var_8148 = const()[name = string("op_8148"), val = int32(-1)]; - tensor attn_weights_65_cast_fp16 = softmax(axis = var_8148, x = attn_weights_63_cast_fp16)[name = string("attn_weights_65_cast_fp16")]; - bool attn_output_101_transpose_x_0 = const()[name = string("attn_output_101_transpose_x_0"), val = bool(false)]; - bool attn_output_101_transpose_y_0 = const()[name = string("attn_output_101_transpose_y_0"), val = bool(false)]; - tensor attn_output_101_cast_fp16 = matmul(transpose_x = attn_output_101_transpose_x_0, transpose_y = attn_output_101_transpose_y_0, x = attn_weights_65_cast_fp16, y = x_171_cast_fp16)[name = string("attn_output_101_cast_fp16")]; - tensor var_8159_perm_0 = const()[name = string("op_8159_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_8163 = const()[name = string("op_8163"), val = tensor([1, 1, 1024])]; - tensor var_8159_cast_fp16 = transpose(perm = var_8159_perm_0, x = attn_output_101_cast_fp16)[name = string("transpose_94")]; - tensor attn_output_105_cast_fp16 = reshape(shape = var_8163, x = var_8159_cast_fp16)[name = string("attn_output_105_cast_fp16")]; - tensor var_8168 = const()[name = string("op_8168"), val = tensor([0, 2, 1])]; - string var_8184_pad_type_0 = const()[name = string("op_8184_pad_type_0"), val = string("valid")]; - int32 var_8184_groups_0 = const()[name = string("op_8184_groups_0"), val = int32(1)]; - tensor var_8184_strides_0 = const()[name = string("op_8184_strides_0"), val = tensor([1])]; - tensor var_8184_pad_0 = const()[name = string("op_8184_pad_0"), val = tensor([0, 0])]; - tensor var_8184_dilations_0 = const()[name = string("op_8184_dilations_0"), val = tensor([1])]; - tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242632960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243517760))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_8169_cast_fp16 = transpose(perm = var_8168, x = attn_output_105_cast_fp16)[name = string("transpose_93")]; - tensor var_8184_cast_fp16 = conv(dilations = var_8184_dilations_0, groups = var_8184_groups_0, pad = var_8184_pad_0, pad_type = var_8184_pad_type_0, strides = var_8184_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_8169_cast_fp16)[name = string("op_8184_cast_fp16")]; - tensor var_8188 = const()[name = string("op_8188"), val = tensor([0, 2, 1])]; - int32 var_8199 = const()[name = string("op_8199"), val = int32(-1)]; - fp16 const_406_promoted_to_fp16 = const()[name = string("const_406_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_169_cast_fp16 = transpose(perm = var_8188, x = var_8184_cast_fp16)[name = string("transpose_92")]; - tensor var_8201_cast_fp16 = mul(x = hidden_states_169_cast_fp16, y = const_406_promoted_to_fp16)[name = string("op_8201_cast_fp16")]; - bool input_211_interleave_0 = const()[name = string("input_211_interleave_0"), val = bool(false)]; - tensor input_211_cast_fp16 = concat(axis = var_8199, interleave = input_211_interleave_0, values = (hidden_states_169_cast_fp16, var_8201_cast_fp16))[name = string("input_211_cast_fp16")]; - tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; - fp16 var_8196_to_fp16 = const()[name = string("op_8196_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_8196_to_fp16, x = input_211_cast_fp16)[name = string("normed_253_cast_fp16")]; - tensor normed_255_begin_0 = const()[name = string("normed_255_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_255_end_0 = const()[name = string("normed_255_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_255_end_mask_0 = const()[name = string("normed_255_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_255_cast_fp16 = slice_by_index(begin = normed_255_begin_0, end = normed_255_end_0, end_mask = normed_255_end_mask_0, x = normed_253_cast_fp16)[name = string("normed_255_cast_fp16")]; - tensor var_8215_to_fp16 = const()[name = string("op_8215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243536256)))]; - tensor attn_output_109_cast_fp16 = mul(x = normed_255_cast_fp16, y = var_8215_to_fp16)[name = string("attn_output_109_cast_fp16")]; - tensor hidden_states_171_cast_fp16 = add(x = hidden_states_161_cast_fp16, y = attn_output_109_cast_fp16)[name = string("hidden_states_171_cast_fp16")]; - int32 var_8228 = const()[name = string("op_8228"), val = int32(-1)]; - fp16 const_410_promoted_to_fp16 = const()[name = string("const_410_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8230_cast_fp16 = mul(x = hidden_states_171_cast_fp16, y = const_410_promoted_to_fp16)[name = string("op_8230_cast_fp16")]; - bool input_213_interleave_0 = const()[name = string("input_213_interleave_0"), val = bool(false)]; - tensor input_213_cast_fp16 = concat(axis = var_8228, interleave = input_213_interleave_0, values = (hidden_states_171_cast_fp16, var_8230_cast_fp16))[name = string("input_213_cast_fp16")]; - tensor normed_257_axes_0 = const()[name = string("normed_257_axes_0"), val = tensor([-1])]; - fp16 var_8225_to_fp16 = const()[name = string("op_8225_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_257_cast_fp16 = layer_norm(axes = normed_257_axes_0, epsilon = var_8225_to_fp16, x = input_213_cast_fp16)[name = string("normed_257_cast_fp16")]; - tensor normed_259_begin_0 = const()[name = string("normed_259_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_259_end_0 = const()[name = string("normed_259_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_259_end_mask_0 = const()[name = string("normed_259_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_259_cast_fp16 = slice_by_index(begin = normed_259_begin_0, end = normed_259_end_0, end_mask = normed_259_end_mask_0, x = normed_257_cast_fp16)[name = string("normed_259_cast_fp16")]; - tensor var_8244_to_fp16 = const()[name = string("op_8244_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243538624)))]; - tensor x_173_cast_fp16 = mul(x = normed_259_cast_fp16, y = var_8244_to_fp16)[name = string("x_173_cast_fp16")]; - tensor var_8256 = const()[name = string("op_8256"), val = tensor([0, 2, 1])]; - tensor input_215_axes_0 = const()[name = string("input_215_axes_0"), val = tensor([2])]; - tensor var_8257_cast_fp16 = transpose(perm = var_8256, x = x_173_cast_fp16)[name = string("transpose_91")]; - tensor input_215_cast_fp16 = expand_dims(axes = input_215_axes_0, x = var_8257_cast_fp16)[name = string("input_215_cast_fp16")]; - string x_175_pad_type_0 = const()[name = string("x_175_pad_type_0"), val = string("valid")]; - tensor x_175_strides_0 = const()[name = string("x_175_strides_0"), val = tensor([1, 1])]; - tensor x_175_pad_0 = const()[name = string("x_175_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_175_dilations_0 = const()[name = string("x_175_dilations_0"), val = tensor([1, 1])]; - int32 x_175_groups_0 = const()[name = string("x_175_groups_0"), val = int32(1)]; - tensor model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243540992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249513024))))[name = string("model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_175_cast_fp16 = conv(dilations = x_175_dilations_0, groups = x_175_groups_0, pad = x_175_pad_0, pad_type = x_175_pad_type_0, strides = x_175_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_215_cast_fp16)[name = string("x_175_cast_fp16")]; - string b_21_pad_type_0 = const()[name = string("b_21_pad_type_0"), val = string("valid")]; - tensor b_21_strides_0 = const()[name = string("b_21_strides_0"), val = tensor([1, 1])]; - tensor b_21_pad_0 = const()[name = string("b_21_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_21_dilations_0 = const()[name = string("b_21_dilations_0"), val = tensor([1, 1])]; - int32 b_21_groups_0 = const()[name = string("b_21_groups_0"), val = int32(1)]; - tensor model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249623680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255595712))))[name = string("model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_21_cast_fp16 = conv(dilations = b_21_dilations_0, groups = b_21_groups_0, pad = b_21_pad_0, pad_type = b_21_pad_type_0, strides = b_21_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_215_cast_fp16)[name = string("b_21_cast_fp16")]; - string var_8282_mode_0 = const()[name = string("op_8282_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_8282_cast_fp16 = gelu(mode = var_8282_mode_0, x = x_175_cast_fp16)[name = string("op_8282_cast_fp16")]; - tensor input_217_cast_fp16 = mul(x = var_8282_cast_fp16, y = b_21_cast_fp16)[name = string("input_217_cast_fp16")]; - string e_21_pad_type_0 = const()[name = string("e_21_pad_type_0"), val = string("valid")]; - tensor e_21_strides_0 = const()[name = string("e_21_strides_0"), val = tensor([1, 1])]; - tensor e_21_pad_0 = const()[name = string("e_21_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_21_dilations_0 = const()[name = string("e_21_dilations_0"), val = tensor([1, 1])]; - int32 e_21_groups_0 = const()[name = string("e_21_groups_0"), val = int32(1)]; - tensor model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255706368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261678400))))[name = string("model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_21_cast_fp16 = conv(dilations = e_21_dilations_0, groups = e_21_groups_0, pad = e_21_pad_0, pad_type = e_21_pad_type_0, strides = e_21_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_217_cast_fp16)[name = string("e_21_cast_fp16")]; - tensor var_8290_axes_0 = const()[name = string("op_8290_axes_0"), val = tensor([2])]; - tensor var_8290_cast_fp16 = squeeze(axes = var_8290_axes_0, x = e_21_cast_fp16)[name = string("op_8290_cast_fp16")]; - tensor var_8291 = const()[name = string("op_8291"), val = tensor([0, 2, 1])]; - int32 var_8302 = const()[name = string("op_8302"), val = int32(-1)]; - fp16 const_414_promoted_to_fp16 = const()[name = string("const_414_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_173_cast_fp16 = transpose(perm = var_8291, x = var_8290_cast_fp16)[name = string("transpose_90")]; - tensor var_8304_cast_fp16 = mul(x = hidden_states_173_cast_fp16, y = const_414_promoted_to_fp16)[name = string("op_8304_cast_fp16")]; - bool input_219_interleave_0 = const()[name = string("input_219_interleave_0"), val = bool(false)]; - tensor input_219_cast_fp16 = concat(axis = var_8302, interleave = input_219_interleave_0, values = (hidden_states_173_cast_fp16, var_8304_cast_fp16))[name = string("input_219_cast_fp16")]; - tensor normed_261_axes_0 = const()[name = string("normed_261_axes_0"), val = tensor([-1])]; - fp16 var_8299_to_fp16 = const()[name = string("op_8299_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_261_cast_fp16 = layer_norm(axes = normed_261_axes_0, epsilon = var_8299_to_fp16, x = input_219_cast_fp16)[name = string("normed_261_cast_fp16")]; - tensor normed_263_begin_0 = const()[name = string("normed_263_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_263_end_0 = const()[name = string("normed_263_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_263_end_mask_0 = const()[name = string("normed_263_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_263_cast_fp16 = slice_by_index(begin = normed_263_begin_0, end = normed_263_end_0, end_mask = normed_263_end_mask_0, x = normed_261_cast_fp16)[name = string("normed_263_cast_fp16")]; - tensor var_8318_to_fp16 = const()[name = string("op_8318_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261696896)))]; - tensor hidden_states_175_cast_fp16 = mul(x = normed_263_cast_fp16, y = var_8318_to_fp16)[name = string("hidden_states_175_cast_fp16")]; - tensor hidden_states_177_cast_fp16 = add(x = hidden_states_171_cast_fp16, y = hidden_states_175_cast_fp16)[name = string("hidden_states_177_cast_fp16")]; - int32 var_8369 = const()[name = string("op_8369"), val = int32(-1)]; - fp16 const_418_promoted_to_fp16 = const()[name = string("const_418_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8371_cast_fp16 = mul(x = hidden_states_177_cast_fp16, y = const_418_promoted_to_fp16)[name = string("op_8371_cast_fp16")]; - bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)]; - tensor input_221_cast_fp16 = concat(axis = var_8369, interleave = input_221_interleave_0, values = (hidden_states_177_cast_fp16, var_8371_cast_fp16))[name = string("input_221_cast_fp16")]; - tensor normed_265_axes_0 = const()[name = string("normed_265_axes_0"), val = tensor([-1])]; - fp16 var_8366_to_fp16 = const()[name = string("op_8366_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_265_cast_fp16 = layer_norm(axes = normed_265_axes_0, epsilon = var_8366_to_fp16, x = input_221_cast_fp16)[name = string("normed_265_cast_fp16")]; - tensor normed_267_begin_0 = const()[name = string("normed_267_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_267_end_0 = const()[name = string("normed_267_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_267_end_mask_0 = const()[name = string("normed_267_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_267_cast_fp16 = slice_by_index(begin = normed_267_begin_0, end = normed_267_end_0, end_mask = normed_267_end_mask_0, x = normed_265_cast_fp16)[name = string("normed_267_cast_fp16")]; - tensor var_8385_to_fp16 = const()[name = string("op_8385_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261699264)))]; - tensor hidden_states_179_cast_fp16 = mul(x = normed_267_cast_fp16, y = var_8385_to_fp16)[name = string("hidden_states_179_cast_fp16")]; - tensor var_8390 = const()[name = string("op_8390"), val = tensor([0, 2, 1])]; - tensor var_8393_axes_0 = const()[name = string("op_8393_axes_0"), val = tensor([2])]; - tensor var_8391_cast_fp16 = transpose(perm = var_8390, x = hidden_states_179_cast_fp16)[name = string("transpose_89")]; - tensor var_8393_cast_fp16 = expand_dims(axes = var_8393_axes_0, x = var_8391_cast_fp16)[name = string("op_8393_cast_fp16")]; - string var_8409_pad_type_0 = const()[name = string("op_8409_pad_type_0"), val = string("valid")]; - tensor var_8409_strides_0 = const()[name = string("op_8409_strides_0"), val = tensor([1, 1])]; - tensor var_8409_pad_0 = const()[name = string("op_8409_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_8409_dilations_0 = const()[name = string("op_8409_dilations_0"), val = tensor([1, 1])]; - int32 var_8409_groups_0 = const()[name = string("op_8409_groups_0"), val = int32(1)]; - tensor var_8409 = conv(dilations = var_8409_dilations_0, groups = var_8409_groups_0, pad = var_8409_pad_0, pad_type = var_8409_pad_type_0, strides = var_8409_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_8393_cast_fp16)[name = string("op_8409")]; - tensor var_8414 = const()[name = string("op_8414"), val = tensor([1, 4, 1, 256])]; - tensor var_8415 = reshape(shape = var_8414, x = var_8409)[name = string("op_8415")]; - string var_8431_pad_type_0 = const()[name = string("op_8431_pad_type_0"), val = string("valid")]; - tensor var_8431_strides_0 = const()[name = string("op_8431_strides_0"), val = tensor([1, 1])]; - tensor var_8431_pad_0 = const()[name = string("op_8431_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_8431_dilations_0 = const()[name = string("op_8431_dilations_0"), val = tensor([1, 1])]; - int32 var_8431_groups_0 = const()[name = string("op_8431_groups_0"), val = int32(1)]; - tensor var_8431 = conv(dilations = var_8431_dilations_0, groups = var_8431_groups_0, pad = var_8431_pad_0, pad_type = var_8431_pad_type_0, strides = var_8431_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_8393_cast_fp16)[name = string("op_8431")]; - tensor var_8436 = const()[name = string("op_8436"), val = tensor([1, 1, 1, 256])]; - tensor var_8437 = reshape(shape = var_8436, x = var_8431)[name = string("op_8437")]; - string var_8453_pad_type_0 = const()[name = string("op_8453_pad_type_0"), val = string("valid")]; - tensor var_8453_strides_0 = const()[name = string("op_8453_strides_0"), val = tensor([1, 1])]; - tensor var_8453_pad_0 = const()[name = string("op_8453_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_8453_dilations_0 = const()[name = string("op_8453_dilations_0"), val = tensor([1, 1])]; - int32 var_8453_groups_0 = const()[name = string("op_8453_groups_0"), val = int32(1)]; - tensor var_8453 = conv(dilations = var_8453_dilations_0, groups = var_8453_groups_0, pad = var_8453_pad_0, pad_type = var_8453_pad_type_0, strides = var_8453_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_8393_cast_fp16)[name = string("op_8453")]; - tensor var_8458 = const()[name = string("op_8458"), val = tensor([1, 1, 1, 256])]; - tensor var_8459 = reshape(shape = var_8458, x = var_8453)[name = string("op_8459")]; - int32 var_8474 = const()[name = string("op_8474"), val = int32(-1)]; - fp16 const_422_promoted = const()[name = string("const_422_promoted"), val = fp16(-0x1p+0)]; - tensor var_8476 = mul(x = var_8415, y = const_422_promoted)[name = string("op_8476")]; - bool input_225_interleave_0 = const()[name = string("input_225_interleave_0"), val = bool(false)]; - tensor input_225 = concat(axis = var_8474, interleave = input_225_interleave_0, values = (var_8415, var_8476))[name = string("input_225")]; - tensor normed_269_axes_0 = const()[name = string("normed_269_axes_0"), val = tensor([-1])]; - fp16 var_8471_to_fp16 = const()[name = string("op_8471_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_269_cast_fp16 = layer_norm(axes = normed_269_axes_0, epsilon = var_8471_to_fp16, x = input_225)[name = string("normed_269_cast_fp16")]; - tensor normed_271_begin_0 = const()[name = string("normed_271_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_271_end_0 = const()[name = string("normed_271_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_271_end_mask_0 = const()[name = string("normed_271_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_271 = slice_by_index(begin = normed_271_begin_0, end = normed_271_end_0, end_mask = normed_271_end_mask_0, x = normed_269_cast_fp16)[name = string("normed_271")]; - tensor var_8490_to_fp16 = const()[name = string("op_8490_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261701632)))]; - tensor q_23_cast_fp16 = mul(x = normed_271, y = var_8490_to_fp16)[name = string("q_23_cast_fp16")]; - int32 var_8501 = const()[name = string("op_8501"), val = int32(-1)]; - fp16 const_426_promoted = const()[name = string("const_426_promoted"), val = fp16(-0x1p+0)]; - tensor var_8503 = mul(x = var_8437, y = const_426_promoted)[name = string("op_8503")]; - bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)]; - tensor input_227 = concat(axis = var_8501, interleave = input_227_interleave_0, values = (var_8437, var_8503))[name = string("input_227")]; - tensor normed_273_axes_0 = const()[name = string("normed_273_axes_0"), val = tensor([-1])]; - fp16 var_8498_to_fp16 = const()[name = string("op_8498_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_273_cast_fp16 = layer_norm(axes = normed_273_axes_0, epsilon = var_8498_to_fp16, x = input_227)[name = string("normed_273_cast_fp16")]; - tensor normed_275_begin_0 = const()[name = string("normed_275_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_275_end_0 = const()[name = string("normed_275_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_275_end_mask_0 = const()[name = string("normed_275_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_275 = slice_by_index(begin = normed_275_begin_0, end = normed_275_end_0, end_mask = normed_275_end_mask_0, x = normed_273_cast_fp16)[name = string("normed_275")]; - tensor var_8517_to_fp16 = const()[name = string("op_8517_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261702208)))]; - tensor k_23_cast_fp16 = mul(x = normed_275, y = var_8517_to_fp16)[name = string("k_23_cast_fp16")]; - tensor var_8519_cast_fp16 = mul(x = q_23_cast_fp16, y = cos_21_cast_fp16)[name = string("op_8519_cast_fp16")]; - tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23_cast_fp16)[name = string("x1_45_cast_fp16")]; - tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23_cast_fp16)[name = string("x2_45_cast_fp16")]; - fp16 const_432_promoted_to_fp16 = const()[name = string("const_432_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8540_cast_fp16 = mul(x = x2_45_cast_fp16, y = const_432_promoted_to_fp16)[name = string("op_8540_cast_fp16")]; - int32 var_8542 = const()[name = string("op_8542"), val = int32(-1)]; - bool var_8543_interleave_0 = const()[name = string("op_8543_interleave_0"), val = bool(false)]; - tensor var_8543_cast_fp16 = concat(axis = var_8542, interleave = var_8543_interleave_0, values = (var_8540_cast_fp16, x1_45_cast_fp16))[name = string("op_8543_cast_fp16")]; - tensor var_8544_cast_fp16 = mul(x = var_8543_cast_fp16, y = sin_21_cast_fp16)[name = string("op_8544_cast_fp16")]; - tensor query_states_45_cast_fp16 = add(x = var_8519_cast_fp16, y = var_8544_cast_fp16)[name = string("query_states_45_cast_fp16")]; - tensor var_8547_cast_fp16 = mul(x = k_23_cast_fp16, y = cos_21_cast_fp16)[name = string("op_8547_cast_fp16")]; - tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23_cast_fp16)[name = string("x1_47_cast_fp16")]; - tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23_cast_fp16)[name = string("x2_47_cast_fp16")]; - fp16 const_435_promoted_to_fp16 = const()[name = string("const_435_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8568_cast_fp16 = mul(x = x2_47_cast_fp16, y = const_435_promoted_to_fp16)[name = string("op_8568_cast_fp16")]; - int32 var_8570 = const()[name = string("op_8570"), val = int32(-1)]; - bool var_8571_interleave_0 = const()[name = string("op_8571_interleave_0"), val = bool(false)]; - tensor var_8571_cast_fp16 = concat(axis = var_8570, interleave = var_8571_interleave_0, values = (var_8568_cast_fp16, x1_47_cast_fp16))[name = string("op_8571_cast_fp16")]; - tensor var_8572_cast_fp16 = mul(x = var_8571_cast_fp16, y = sin_21_cast_fp16)[name = string("op_8572_cast_fp16")]; - tensor key_states_45_cast_fp16 = add(x = var_8547_cast_fp16, y = var_8572_cast_fp16)[name = string("key_states_45_cast_fp16")]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_3_stride_0, update = key_states_45_cast_fp16, x = coreml_update_state_63)[name = string("model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_22_write_state")]; - tensor coreml_update_state_74 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_22")]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_4_stride_0, update = var_8459, x = coreml_update_state_74)[name = string("model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_23_write_state")]; - tensor coreml_update_state_75 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_23")]; - tensor var_8627_begin_0 = const()[name = string("op_8627_begin_0"), val = tensor([1, 0, 0, 0])]; - tensor var_8627_end_0 = const()[name = string("op_8627_end_0"), val = tensor([2, 1, 4096, 256])]; - tensor var_8627_end_mask_0 = const()[name = string("op_8627_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_8627_cast_fp16 = slice_by_index(begin = var_8627_begin_0, end = var_8627_end_0, end_mask = var_8627_end_mask_0, x = coreml_update_state_75)[name = string("op_8627_cast_fp16")]; - tensor var_8634_begin_0 = const()[name = string("op_8634_begin_0"), val = tensor([5, 0, 0, 0])]; - tensor var_8634_end_0 = const()[name = string("op_8634_end_0"), val = tensor([6, 1, 4096, 256])]; - tensor var_8634_end_mask_0 = const()[name = string("op_8634_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_8634_cast_fp16 = slice_by_index(begin = var_8634_begin_0, end = var_8634_end_0, end_mask = var_8634_end_mask_0, x = coreml_update_state_75)[name = string("op_8634_cast_fp16")]; - tensor var_8671 = const()[name = string("op_8671"), val = tensor([1, 4, 1, 1])]; - tensor x_181_cast_fp16 = tile(reps = var_8671, x = var_8627_cast_fp16)[name = string("x_181_cast_fp16")]; - tensor var_8691 = const()[name = string("op_8691"), val = tensor([1, 4, 1, 1])]; - tensor x_187_cast_fp16 = tile(reps = var_8691, x = var_8634_cast_fp16)[name = string("x_187_cast_fp16")]; - bool var_8718_transpose_x_1 = const()[name = string("op_8718_transpose_x_1"), val = bool(false)]; - bool var_8718_transpose_y_1 = const()[name = string("op_8718_transpose_y_1"), val = bool(true)]; - tensor var_8718 = matmul(transpose_x = var_8718_transpose_x_1, transpose_y = var_8718_transpose_y_1, x = query_states_45_cast_fp16, y = x_181_cast_fp16)[name = string("op_8718")]; - fp16 var_8719_to_fp16 = const()[name = string("op_8719_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_67_cast_fp16 = mul(x = var_8718, y = var_8719_to_fp16)[name = string("attn_weights_67_cast_fp16")]; - tensor attn_weights_69_cast_fp16 = add(x = attn_weights_67_cast_fp16, y = causal_mask)[name = string("attn_weights_69_cast_fp16")]; - int32 var_8754 = const()[name = string("op_8754"), val = int32(-1)]; - tensor attn_weights_71_cast_fp16 = softmax(axis = var_8754, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; - bool attn_output_111_transpose_x_0 = const()[name = string("attn_output_111_transpose_x_0"), val = bool(false)]; - bool attn_output_111_transpose_y_0 = const()[name = string("attn_output_111_transpose_y_0"), val = bool(false)]; - tensor attn_output_111_cast_fp16 = matmul(transpose_x = attn_output_111_transpose_x_0, transpose_y = attn_output_111_transpose_y_0, x = attn_weights_71_cast_fp16, y = x_187_cast_fp16)[name = string("attn_output_111_cast_fp16")]; - tensor var_8765_perm_0 = const()[name = string("op_8765_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_8769 = const()[name = string("op_8769"), val = tensor([1, 1, 1024])]; - tensor var_8765_cast_fp16 = transpose(perm = var_8765_perm_0, x = attn_output_111_cast_fp16)[name = string("transpose_88")]; - tensor attn_output_115_cast_fp16 = reshape(shape = var_8769, x = var_8765_cast_fp16)[name = string("attn_output_115_cast_fp16")]; - tensor var_8774 = const()[name = string("op_8774"), val = tensor([0, 2, 1])]; - string var_8790_pad_type_0 = const()[name = string("op_8790_pad_type_0"), val = string("valid")]; - int32 var_8790_groups_0 = const()[name = string("op_8790_groups_0"), val = int32(1)]; - tensor var_8790_strides_0 = const()[name = string("op_8790_strides_0"), val = tensor([1])]; - tensor var_8790_pad_0 = const()[name = string("op_8790_pad_0"), val = tensor([0, 0])]; - tensor var_8790_dilations_0 = const()[name = string("op_8790_dilations_0"), val = tensor([1])]; - tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261702784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262587584))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_8775_cast_fp16 = transpose(perm = var_8774, x = attn_output_115_cast_fp16)[name = string("transpose_87")]; - tensor var_8790_cast_fp16 = conv(dilations = var_8790_dilations_0, groups = var_8790_groups_0, pad = var_8790_pad_0, pad_type = var_8790_pad_type_0, strides = var_8790_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_8775_cast_fp16)[name = string("op_8790_cast_fp16")]; - tensor var_8794 = const()[name = string("op_8794"), val = tensor([0, 2, 1])]; - int32 var_8805 = const()[name = string("op_8805"), val = int32(-1)]; - fp16 const_444_promoted_to_fp16 = const()[name = string("const_444_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_185_cast_fp16 = transpose(perm = var_8794, x = var_8790_cast_fp16)[name = string("transpose_86")]; - tensor var_8807_cast_fp16 = mul(x = hidden_states_185_cast_fp16, y = const_444_promoted_to_fp16)[name = string("op_8807_cast_fp16")]; - bool input_231_interleave_0 = const()[name = string("input_231_interleave_0"), val = bool(false)]; - tensor input_231_cast_fp16 = concat(axis = var_8805, interleave = input_231_interleave_0, values = (hidden_states_185_cast_fp16, var_8807_cast_fp16))[name = string("input_231_cast_fp16")]; - tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; - fp16 var_8802_to_fp16 = const()[name = string("op_8802_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_8802_to_fp16, x = input_231_cast_fp16)[name = string("normed_277_cast_fp16")]; - tensor normed_279_begin_0 = const()[name = string("normed_279_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_279_end_0 = const()[name = string("normed_279_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_279_end_mask_0 = const()[name = string("normed_279_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_279_cast_fp16 = slice_by_index(begin = normed_279_begin_0, end = normed_279_end_0, end_mask = normed_279_end_mask_0, x = normed_277_cast_fp16)[name = string("normed_279_cast_fp16")]; - tensor var_8821_to_fp16 = const()[name = string("op_8821_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262606080)))]; - tensor attn_output_119_cast_fp16 = mul(x = normed_279_cast_fp16, y = var_8821_to_fp16)[name = string("attn_output_119_cast_fp16")]; - tensor hidden_states_187_cast_fp16 = add(x = hidden_states_177_cast_fp16, y = attn_output_119_cast_fp16)[name = string("hidden_states_187_cast_fp16")]; - int32 var_8834 = const()[name = string("op_8834"), val = int32(-1)]; - fp16 const_448_promoted_to_fp16 = const()[name = string("const_448_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8836_cast_fp16 = mul(x = hidden_states_187_cast_fp16, y = const_448_promoted_to_fp16)[name = string("op_8836_cast_fp16")]; - bool input_233_interleave_0 = const()[name = string("input_233_interleave_0"), val = bool(false)]; - tensor input_233_cast_fp16 = concat(axis = var_8834, interleave = input_233_interleave_0, values = (hidden_states_187_cast_fp16, var_8836_cast_fp16))[name = string("input_233_cast_fp16")]; - tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; - fp16 var_8831_to_fp16 = const()[name = string("op_8831_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_8831_to_fp16, x = input_233_cast_fp16)[name = string("normed_281_cast_fp16")]; - tensor normed_283_begin_0 = const()[name = string("normed_283_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_283_end_0 = const()[name = string("normed_283_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_283_end_mask_0 = const()[name = string("normed_283_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_283_cast_fp16 = slice_by_index(begin = normed_283_begin_0, end = normed_283_end_0, end_mask = normed_283_end_mask_0, x = normed_281_cast_fp16)[name = string("normed_283_cast_fp16")]; - tensor var_8850_to_fp16 = const()[name = string("op_8850_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262608448)))]; - tensor x_189_cast_fp16 = mul(x = normed_283_cast_fp16, y = var_8850_to_fp16)[name = string("x_189_cast_fp16")]; - tensor var_8862 = const()[name = string("op_8862"), val = tensor([0, 2, 1])]; - tensor input_235_axes_0 = const()[name = string("input_235_axes_0"), val = tensor([2])]; - tensor var_8863_cast_fp16 = transpose(perm = var_8862, x = x_189_cast_fp16)[name = string("transpose_85")]; - tensor input_235_cast_fp16 = expand_dims(axes = input_235_axes_0, x = var_8863_cast_fp16)[name = string("input_235_cast_fp16")]; - string x_191_pad_type_0 = const()[name = string("x_191_pad_type_0"), val = string("valid")]; - tensor x_191_strides_0 = const()[name = string("x_191_strides_0"), val = tensor([1, 1])]; - tensor x_191_pad_0 = const()[name = string("x_191_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_191_dilations_0 = const()[name = string("x_191_dilations_0"), val = tensor([1, 1])]; - int32 x_191_groups_0 = const()[name = string("x_191_groups_0"), val = int32(1)]; - tensor model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262610816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268582848))))[name = string("model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_191_cast_fp16 = conv(dilations = x_191_dilations_0, groups = x_191_groups_0, pad = x_191_pad_0, pad_type = x_191_pad_type_0, strides = x_191_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_235_cast_fp16)[name = string("x_191_cast_fp16")]; - string b_23_pad_type_0 = const()[name = string("b_23_pad_type_0"), val = string("valid")]; - tensor b_23_strides_0 = const()[name = string("b_23_strides_0"), val = tensor([1, 1])]; - tensor b_23_pad_0 = const()[name = string("b_23_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_23_dilations_0 = const()[name = string("b_23_dilations_0"), val = tensor([1, 1])]; - int32 b_23_groups_0 = const()[name = string("b_23_groups_0"), val = int32(1)]; - tensor model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268693504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274665536))))[name = string("model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_23_cast_fp16 = conv(dilations = b_23_dilations_0, groups = b_23_groups_0, pad = b_23_pad_0, pad_type = b_23_pad_type_0, strides = b_23_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_235_cast_fp16)[name = string("b_23_cast_fp16")]; - string var_8888_mode_0 = const()[name = string("op_8888_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_8888_cast_fp16 = gelu(mode = var_8888_mode_0, x = x_191_cast_fp16)[name = string("op_8888_cast_fp16")]; - tensor input_237_cast_fp16 = mul(x = var_8888_cast_fp16, y = b_23_cast_fp16)[name = string("input_237_cast_fp16")]; - string e_23_pad_type_0 = const()[name = string("e_23_pad_type_0"), val = string("valid")]; - tensor e_23_strides_0 = const()[name = string("e_23_strides_0"), val = tensor([1, 1])]; - tensor e_23_pad_0 = const()[name = string("e_23_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_23_dilations_0 = const()[name = string("e_23_dilations_0"), val = tensor([1, 1])]; - int32 e_23_groups_0 = const()[name = string("e_23_groups_0"), val = int32(1)]; - tensor model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274776192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280748224))))[name = string("model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_23_cast_fp16 = conv(dilations = e_23_dilations_0, groups = e_23_groups_0, pad = e_23_pad_0, pad_type = e_23_pad_type_0, strides = e_23_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_237_cast_fp16)[name = string("e_23_cast_fp16")]; - tensor var_8896_axes_0 = const()[name = string("op_8896_axes_0"), val = tensor([2])]; - tensor var_8896_cast_fp16 = squeeze(axes = var_8896_axes_0, x = e_23_cast_fp16)[name = string("op_8896_cast_fp16")]; - tensor var_8897 = const()[name = string("op_8897"), val = tensor([0, 2, 1])]; - int32 var_8908 = const()[name = string("op_8908"), val = int32(-1)]; - fp16 const_452_promoted_to_fp16 = const()[name = string("const_452_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_189_cast_fp16 = transpose(perm = var_8897, x = var_8896_cast_fp16)[name = string("transpose_84")]; - tensor var_8910_cast_fp16 = mul(x = hidden_states_189_cast_fp16, y = const_452_promoted_to_fp16)[name = string("op_8910_cast_fp16")]; - bool input_239_interleave_0 = const()[name = string("input_239_interleave_0"), val = bool(false)]; - tensor input_239_cast_fp16 = concat(axis = var_8908, interleave = input_239_interleave_0, values = (hidden_states_189_cast_fp16, var_8910_cast_fp16))[name = string("input_239_cast_fp16")]; - tensor normed_285_axes_0 = const()[name = string("normed_285_axes_0"), val = tensor([-1])]; - fp16 var_8905_to_fp16 = const()[name = string("op_8905_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_285_cast_fp16 = layer_norm(axes = normed_285_axes_0, epsilon = var_8905_to_fp16, x = input_239_cast_fp16)[name = string("normed_285_cast_fp16")]; - tensor normed_287_begin_0 = const()[name = string("normed_287_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_287_end_0 = const()[name = string("normed_287_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_287_end_mask_0 = const()[name = string("normed_287_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_287_cast_fp16 = slice_by_index(begin = normed_287_begin_0, end = normed_287_end_0, end_mask = normed_287_end_mask_0, x = normed_285_cast_fp16)[name = string("normed_287_cast_fp16")]; - tensor var_8924_to_fp16 = const()[name = string("op_8924_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280766720)))]; - tensor hidden_states_191_cast_fp16 = mul(x = normed_287_cast_fp16, y = var_8924_to_fp16)[name = string("hidden_states_191_cast_fp16")]; - tensor hidden_states_193_cast_fp16 = add(x = hidden_states_187_cast_fp16, y = hidden_states_191_cast_fp16)[name = string("hidden_states_193_cast_fp16")]; - int32 var_8975 = const()[name = string("op_8975"), val = int32(-1)]; - fp16 const_456_promoted_to_fp16 = const()[name = string("const_456_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8977_cast_fp16 = mul(x = hidden_states_193_cast_fp16, y = const_456_promoted_to_fp16)[name = string("op_8977_cast_fp16")]; - bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; - tensor input_241_cast_fp16 = concat(axis = var_8975, interleave = input_241_interleave_0, values = (hidden_states_193_cast_fp16, var_8977_cast_fp16))[name = string("input_241_cast_fp16")]; - tensor normed_289_axes_0 = const()[name = string("normed_289_axes_0"), val = tensor([-1])]; - fp16 var_8972_to_fp16 = const()[name = string("op_8972_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_289_cast_fp16 = layer_norm(axes = normed_289_axes_0, epsilon = var_8972_to_fp16, x = input_241_cast_fp16)[name = string("normed_289_cast_fp16")]; - tensor normed_291_begin_0 = const()[name = string("normed_291_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_291_end_0 = const()[name = string("normed_291_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_291_end_mask_0 = const()[name = string("normed_291_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_291_cast_fp16 = slice_by_index(begin = normed_291_begin_0, end = normed_291_end_0, end_mask = normed_291_end_mask_0, x = normed_289_cast_fp16)[name = string("normed_291_cast_fp16")]; - tensor var_8991_to_fp16 = const()[name = string("op_8991_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280769088)))]; - tensor hidden_states_195_cast_fp16 = mul(x = normed_291_cast_fp16, y = var_8991_to_fp16)[name = string("hidden_states_195_cast_fp16")]; - tensor var_8996 = const()[name = string("op_8996"), val = tensor([0, 2, 1])]; - tensor var_8999_axes_0 = const()[name = string("op_8999_axes_0"), val = tensor([2])]; - tensor var_8997_cast_fp16 = transpose(perm = var_8996, x = hidden_states_195_cast_fp16)[name = string("transpose_83")]; - tensor var_8999_cast_fp16 = expand_dims(axes = var_8999_axes_0, x = var_8997_cast_fp16)[name = string("op_8999_cast_fp16")]; - string var_9015_pad_type_0 = const()[name = string("op_9015_pad_type_0"), val = string("valid")]; - tensor var_9015_strides_0 = const()[name = string("op_9015_strides_0"), val = tensor([1, 1])]; - tensor var_9015_pad_0 = const()[name = string("op_9015_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_9015_dilations_0 = const()[name = string("op_9015_dilations_0"), val = tensor([1, 1])]; - int32 var_9015_groups_0 = const()[name = string("op_9015_groups_0"), val = int32(1)]; - tensor var_9015 = conv(dilations = var_9015_dilations_0, groups = var_9015_groups_0, pad = var_9015_pad_0, pad_type = var_9015_pad_type_0, strides = var_9015_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_8999_cast_fp16)[name = string("op_9015")]; - tensor var_9020 = const()[name = string("op_9020"), val = tensor([1, 4, 1, 256])]; - tensor var_9021 = reshape(shape = var_9020, x = var_9015)[name = string("op_9021")]; - string var_9037_pad_type_0 = const()[name = string("op_9037_pad_type_0"), val = string("valid")]; - tensor var_9037_strides_0 = const()[name = string("op_9037_strides_0"), val = tensor([1, 1])]; - tensor var_9037_pad_0 = const()[name = string("op_9037_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_9037_dilations_0 = const()[name = string("op_9037_dilations_0"), val = tensor([1, 1])]; - int32 var_9037_groups_0 = const()[name = string("op_9037_groups_0"), val = int32(1)]; - tensor var_9037 = conv(dilations = var_9037_dilations_0, groups = var_9037_groups_0, pad = var_9037_pad_0, pad_type = var_9037_pad_type_0, strides = var_9037_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_8999_cast_fp16)[name = string("op_9037")]; - tensor var_9042 = const()[name = string("op_9042"), val = tensor([1, 1, 1, 256])]; - tensor var_9043 = reshape(shape = var_9042, x = var_9037)[name = string("op_9043")]; - string var_9059_pad_type_0 = const()[name = string("op_9059_pad_type_0"), val = string("valid")]; - tensor var_9059_strides_0 = const()[name = string("op_9059_strides_0"), val = tensor([1, 1])]; - tensor var_9059_pad_0 = const()[name = string("op_9059_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_9059_dilations_0 = const()[name = string("op_9059_dilations_0"), val = tensor([1, 1])]; - int32 var_9059_groups_0 = const()[name = string("op_9059_groups_0"), val = int32(1)]; - tensor var_9059 = conv(dilations = var_9059_dilations_0, groups = var_9059_groups_0, pad = var_9059_pad_0, pad_type = var_9059_pad_type_0, strides = var_9059_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_8999_cast_fp16)[name = string("op_9059")]; - tensor var_9064 = const()[name = string("op_9064"), val = tensor([1, 1, 1, 256])]; - tensor var_9065 = reshape(shape = var_9064, x = var_9059)[name = string("op_9065")]; - int32 var_9080 = const()[name = string("op_9080"), val = int32(-1)]; - fp16 const_460_promoted = const()[name = string("const_460_promoted"), val = fp16(-0x1p+0)]; - tensor var_9082 = mul(x = var_9021, y = const_460_promoted)[name = string("op_9082")]; - bool input_245_interleave_0 = const()[name = string("input_245_interleave_0"), val = bool(false)]; - tensor input_245 = concat(axis = var_9080, interleave = input_245_interleave_0, values = (var_9021, var_9082))[name = string("input_245")]; - tensor normed_293_axes_0 = const()[name = string("normed_293_axes_0"), val = tensor([-1])]; - fp16 var_9077_to_fp16 = const()[name = string("op_9077_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_293_cast_fp16 = layer_norm(axes = normed_293_axes_0, epsilon = var_9077_to_fp16, x = input_245)[name = string("normed_293_cast_fp16")]; - tensor normed_295_begin_0 = const()[name = string("normed_295_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_295_end_0 = const()[name = string("normed_295_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_295_end_mask_0 = const()[name = string("normed_295_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_295 = slice_by_index(begin = normed_295_begin_0, end = normed_295_end_0, end_mask = normed_295_end_mask_0, x = normed_293_cast_fp16)[name = string("normed_295")]; - tensor var_9096_to_fp16 = const()[name = string("op_9096_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280771456)))]; - tensor q_25_cast_fp16 = mul(x = normed_295, y = var_9096_to_fp16)[name = string("q_25_cast_fp16")]; - int32 var_9107 = const()[name = string("op_9107"), val = int32(-1)]; - fp16 const_464_promoted = const()[name = string("const_464_promoted"), val = fp16(-0x1p+0)]; - tensor var_9109 = mul(x = var_9043, y = const_464_promoted)[name = string("op_9109")]; - bool input_247_interleave_0 = const()[name = string("input_247_interleave_0"), val = bool(false)]; - tensor input_247 = concat(axis = var_9107, interleave = input_247_interleave_0, values = (var_9043, var_9109))[name = string("input_247")]; - tensor normed_297_axes_0 = const()[name = string("normed_297_axes_0"), val = tensor([-1])]; - fp16 var_9104_to_fp16 = const()[name = string("op_9104_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_297_cast_fp16 = layer_norm(axes = normed_297_axes_0, epsilon = var_9104_to_fp16, x = input_247)[name = string("normed_297_cast_fp16")]; - tensor normed_299_begin_0 = const()[name = string("normed_299_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_299_end_0 = const()[name = string("normed_299_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_299_end_mask_0 = const()[name = string("normed_299_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_299 = slice_by_index(begin = normed_299_begin_0, end = normed_299_end_0, end_mask = normed_299_end_mask_0, x = normed_297_cast_fp16)[name = string("normed_299")]; - tensor var_9123_to_fp16 = const()[name = string("op_9123_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280772032)))]; - tensor k_25_cast_fp16 = mul(x = normed_299, y = var_9123_to_fp16)[name = string("k_25_cast_fp16")]; - tensor var_9125_cast_fp16 = mul(x = q_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9125_cast_fp16")]; - tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25_cast_fp16)[name = string("x1_49_cast_fp16")]; - tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25_cast_fp16)[name = string("x2_49_cast_fp16")]; - fp16 const_470_promoted_to_fp16 = const()[name = string("const_470_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9146_cast_fp16 = mul(x = x2_49_cast_fp16, y = const_470_promoted_to_fp16)[name = string("op_9146_cast_fp16")]; - int32 var_9148 = const()[name = string("op_9148"), val = int32(-1)]; - bool var_9149_interleave_0 = const()[name = string("op_9149_interleave_0"), val = bool(false)]; - tensor var_9149_cast_fp16 = concat(axis = var_9148, interleave = var_9149_interleave_0, values = (var_9146_cast_fp16, x1_49_cast_fp16))[name = string("op_9149_cast_fp16")]; - tensor var_9150_cast_fp16 = mul(x = var_9149_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9150_cast_fp16")]; - tensor query_states_49_cast_fp16 = add(x = var_9125_cast_fp16, y = var_9150_cast_fp16)[name = string("query_states_49_cast_fp16")]; - tensor var_9153_cast_fp16 = mul(x = k_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9153_cast_fp16")]; - tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25_cast_fp16)[name = string("x1_51_cast_fp16")]; - tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25_cast_fp16)[name = string("x2_51_cast_fp16")]; - fp16 const_473_promoted_to_fp16 = const()[name = string("const_473_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9174_cast_fp16 = mul(x = x2_51_cast_fp16, y = const_473_promoted_to_fp16)[name = string("op_9174_cast_fp16")]; - int32 var_9176 = const()[name = string("op_9176"), val = int32(-1)]; - bool var_9177_interleave_0 = const()[name = string("op_9177_interleave_0"), val = bool(false)]; - tensor var_9177_cast_fp16 = concat(axis = var_9176, interleave = var_9177_interleave_0, values = (var_9174_cast_fp16, x1_51_cast_fp16))[name = string("op_9177_cast_fp16")]; - tensor var_9178_cast_fp16 = mul(x = var_9177_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9178_cast_fp16")]; - tensor key_states_49_cast_fp16 = add(x = var_9153_cast_fp16, y = var_9178_cast_fp16)[name = string("key_states_49_cast_fp16")]; - tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([10])]; - tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; - tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; - tensor expand_dims_148 = const()[name = string("expand_dims_148"), val = tensor([11])]; - int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)]; - bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)]; - tensor concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (expand_dims_144, expand_dims_145, current_pos, expand_dims_147))[name = string("concat_98")]; - tensor concat_99_values1_0 = const()[name = string("concat_99_values1_0"), val = tensor([0])]; - tensor concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor([0])]; - int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)]; - bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)]; - tensor concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (expand_dims_148, concat_99_values1_0, var_1909, concat_99_values3_0))[name = string("concat_99")]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_98, begin_mask = model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0, end = concat_99, end_mask = model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_21_stride_0, update = key_states_49_cast_fp16, x = coreml_update_state_73)[name = string("model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_24_write_state")]; - tensor coreml_update_state_76 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_24")]; - tensor expand_dims_150 = const()[name = string("expand_dims_150"), val = tensor([32])]; - tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([0])]; - tensor expand_dims_153 = const()[name = string("expand_dims_153"), val = tensor([0])]; - tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([33])]; - int32 concat_102_axis_0 = const()[name = string("concat_102_axis_0"), val = int32(0)]; - bool concat_102_interleave_0 = const()[name = string("concat_102_interleave_0"), val = bool(false)]; - tensor concat_102 = concat(axis = concat_102_axis_0, interleave = concat_102_interleave_0, values = (expand_dims_150, expand_dims_151, current_pos, expand_dims_153))[name = string("concat_102")]; - tensor concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = tensor([0])]; - tensor concat_103_values3_0 = const()[name = string("concat_103_values3_0"), val = tensor([0])]; - int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)]; - bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)]; - tensor concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (expand_dims_154, concat_103_values1_0, var_1909, concat_103_values3_0))[name = string("concat_103")]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_102, begin_mask = model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0, end = concat_103, end_mask = model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_22_stride_0, update = var_9065, x = coreml_update_state_76)[name = string("model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_25_write_state")]; - tensor coreml_update_state_77 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_25")]; - tensor var_9233_begin_0 = const()[name = string("op_9233_begin_0"), val = tensor([10, 0, 0, 0])]; - tensor var_9233_end_0 = const()[name = string("op_9233_end_0"), val = tensor([11, 1, 512, 256])]; - tensor var_9233_end_mask_0 = const()[name = string("op_9233_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_9233_cast_fp16 = slice_by_index(begin = var_9233_begin_0, end = var_9233_end_0, end_mask = var_9233_end_mask_0, x = coreml_update_state_77)[name = string("op_9233_cast_fp16")]; - tensor var_9240_begin_0 = const()[name = string("op_9240_begin_0"), val = tensor([32, 0, 0, 0])]; - tensor var_9240_end_0 = const()[name = string("op_9240_end_0"), val = tensor([33, 1, 512, 256])]; - tensor var_9240_end_mask_0 = const()[name = string("op_9240_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_9240_cast_fp16 = slice_by_index(begin = var_9240_begin_0, end = var_9240_end_0, end_mask = var_9240_end_mask_0, x = coreml_update_state_77)[name = string("op_9240_cast_fp16")]; - tensor var_9277 = const()[name = string("op_9277"), val = tensor([1, 4, 1, 1])]; - tensor x_197_cast_fp16 = tile(reps = var_9277, x = var_9233_cast_fp16)[name = string("x_197_cast_fp16")]; - tensor var_9297 = const()[name = string("op_9297"), val = tensor([1, 4, 1, 1])]; - tensor x_203_cast_fp16 = tile(reps = var_9297, x = var_9240_cast_fp16)[name = string("x_203_cast_fp16")]; - bool var_9324_transpose_x_1 = const()[name = string("op_9324_transpose_x_1"), val = bool(false)]; - bool var_9324_transpose_y_1 = const()[name = string("op_9324_transpose_y_1"), val = bool(true)]; - tensor var_9324 = matmul(transpose_x = var_9324_transpose_x_1, transpose_y = var_9324_transpose_y_1, x = query_states_49_cast_fp16, y = x_197_cast_fp16)[name = string("op_9324")]; - fp16 var_9325_to_fp16 = const()[name = string("op_9325_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_73_cast_fp16 = mul(x = var_9324, y = var_9325_to_fp16)[name = string("attn_weights_73_cast_fp16")]; - tensor attn_weights_75_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = var_2083)[name = string("attn_weights_75_cast_fp16")]; - int32 var_9360 = const()[name = string("op_9360"), val = int32(-1)]; - tensor attn_weights_77_cast_fp16 = softmax(axis = var_9360, x = attn_weights_75_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; - bool attn_output_121_transpose_x_0 = const()[name = string("attn_output_121_transpose_x_0"), val = bool(false)]; - bool attn_output_121_transpose_y_0 = const()[name = string("attn_output_121_transpose_y_0"), val = bool(false)]; - tensor attn_output_121_cast_fp16 = matmul(transpose_x = attn_output_121_transpose_x_0, transpose_y = attn_output_121_transpose_y_0, x = attn_weights_77_cast_fp16, y = x_203_cast_fp16)[name = string("attn_output_121_cast_fp16")]; - tensor var_9371_perm_0 = const()[name = string("op_9371_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_9375 = const()[name = string("op_9375"), val = tensor([1, 1, 1024])]; - tensor var_9371_cast_fp16 = transpose(perm = var_9371_perm_0, x = attn_output_121_cast_fp16)[name = string("transpose_82")]; - tensor attn_output_125_cast_fp16 = reshape(shape = var_9375, x = var_9371_cast_fp16)[name = string("attn_output_125_cast_fp16")]; - tensor var_9380 = const()[name = string("op_9380"), val = tensor([0, 2, 1])]; - string var_9396_pad_type_0 = const()[name = string("op_9396_pad_type_0"), val = string("valid")]; - int32 var_9396_groups_0 = const()[name = string("op_9396_groups_0"), val = int32(1)]; - tensor var_9396_strides_0 = const()[name = string("op_9396_strides_0"), val = tensor([1])]; - tensor var_9396_pad_0 = const()[name = string("op_9396_pad_0"), val = tensor([0, 0])]; - tensor var_9396_dilations_0 = const()[name = string("op_9396_dilations_0"), val = tensor([1])]; - tensor squeeze_12_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280772608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281657408))))[name = string("squeeze_12_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_9381_cast_fp16 = transpose(perm = var_9380, x = attn_output_125_cast_fp16)[name = string("transpose_81")]; - tensor var_9396_cast_fp16 = conv(dilations = var_9396_dilations_0, groups = var_9396_groups_0, pad = var_9396_pad_0, pad_type = var_9396_pad_type_0, strides = var_9396_strides_0, weight = squeeze_12_cast_fp16_to_fp32_to_fp16_palettized, x = var_9381_cast_fp16)[name = string("op_9396_cast_fp16")]; - tensor var_9400 = const()[name = string("op_9400"), val = tensor([0, 2, 1])]; - int32 var_9411 = const()[name = string("op_9411"), val = int32(-1)]; - fp16 const_482_promoted_to_fp16 = const()[name = string("const_482_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_201_cast_fp16 = transpose(perm = var_9400, x = var_9396_cast_fp16)[name = string("transpose_80")]; - tensor var_9413_cast_fp16 = mul(x = hidden_states_201_cast_fp16, y = const_482_promoted_to_fp16)[name = string("op_9413_cast_fp16")]; - bool input_251_interleave_0 = const()[name = string("input_251_interleave_0"), val = bool(false)]; - tensor input_251_cast_fp16 = concat(axis = var_9411, interleave = input_251_interleave_0, values = (hidden_states_201_cast_fp16, var_9413_cast_fp16))[name = string("input_251_cast_fp16")]; - tensor normed_301_axes_0 = const()[name = string("normed_301_axes_0"), val = tensor([-1])]; - fp16 var_9408_to_fp16 = const()[name = string("op_9408_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_301_cast_fp16 = layer_norm(axes = normed_301_axes_0, epsilon = var_9408_to_fp16, x = input_251_cast_fp16)[name = string("normed_301_cast_fp16")]; - tensor normed_303_begin_0 = const()[name = string("normed_303_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_303_end_0 = const()[name = string("normed_303_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_303_end_mask_0 = const()[name = string("normed_303_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_303_cast_fp16 = slice_by_index(begin = normed_303_begin_0, end = normed_303_end_0, end_mask = normed_303_end_mask_0, x = normed_301_cast_fp16)[name = string("normed_303_cast_fp16")]; - tensor var_9427_to_fp16 = const()[name = string("op_9427_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281675904)))]; - tensor attn_output_129_cast_fp16 = mul(x = normed_303_cast_fp16, y = var_9427_to_fp16)[name = string("attn_output_129_cast_fp16")]; - tensor hidden_states_203_cast_fp16 = add(x = hidden_states_193_cast_fp16, y = attn_output_129_cast_fp16)[name = string("hidden_states_203_cast_fp16")]; - int32 var_9440 = const()[name = string("op_9440"), val = int32(-1)]; - fp16 const_486_promoted_to_fp16 = const()[name = string("const_486_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9442_cast_fp16 = mul(x = hidden_states_203_cast_fp16, y = const_486_promoted_to_fp16)[name = string("op_9442_cast_fp16")]; - bool input_253_interleave_0 = const()[name = string("input_253_interleave_0"), val = bool(false)]; - tensor input_253_cast_fp16 = concat(axis = var_9440, interleave = input_253_interleave_0, values = (hidden_states_203_cast_fp16, var_9442_cast_fp16))[name = string("input_253_cast_fp16")]; - tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; - fp16 var_9437_to_fp16 = const()[name = string("op_9437_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_9437_to_fp16, x = input_253_cast_fp16)[name = string("normed_305_cast_fp16")]; - tensor normed_307_begin_0 = const()[name = string("normed_307_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_307_end_0 = const()[name = string("normed_307_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_307_end_mask_0 = const()[name = string("normed_307_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_307_cast_fp16 = slice_by_index(begin = normed_307_begin_0, end = normed_307_end_0, end_mask = normed_307_end_mask_0, x = normed_305_cast_fp16)[name = string("normed_307_cast_fp16")]; - tensor var_9456_to_fp16 = const()[name = string("op_9456_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281678272)))]; - tensor x_205_cast_fp16 = mul(x = normed_307_cast_fp16, y = var_9456_to_fp16)[name = string("x_205_cast_fp16")]; - tensor var_9468 = const()[name = string("op_9468"), val = tensor([0, 2, 1])]; - tensor input_255_axes_0 = const()[name = string("input_255_axes_0"), val = tensor([2])]; - tensor var_9469_cast_fp16 = transpose(perm = var_9468, x = x_205_cast_fp16)[name = string("transpose_79")]; - tensor input_255_cast_fp16 = expand_dims(axes = input_255_axes_0, x = var_9469_cast_fp16)[name = string("input_255_cast_fp16")]; - string x_207_pad_type_0 = const()[name = string("x_207_pad_type_0"), val = string("valid")]; - tensor x_207_strides_0 = const()[name = string("x_207_strides_0"), val = tensor([1, 1])]; - tensor x_207_pad_0 = const()[name = string("x_207_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_207_dilations_0 = const()[name = string("x_207_dilations_0"), val = tensor([1, 1])]; - int32 x_207_groups_0 = const()[name = string("x_207_groups_0"), val = int32(1)]; - tensor model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281680640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287652672))))[name = string("model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_207_cast_fp16 = conv(dilations = x_207_dilations_0, groups = x_207_groups_0, pad = x_207_pad_0, pad_type = x_207_pad_type_0, strides = x_207_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_255_cast_fp16)[name = string("x_207_cast_fp16")]; - string b_25_pad_type_0 = const()[name = string("b_25_pad_type_0"), val = string("valid")]; - tensor b_25_strides_0 = const()[name = string("b_25_strides_0"), val = tensor([1, 1])]; - tensor b_25_pad_0 = const()[name = string("b_25_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_25_dilations_0 = const()[name = string("b_25_dilations_0"), val = tensor([1, 1])]; - int32 b_25_groups_0 = const()[name = string("b_25_groups_0"), val = int32(1)]; - tensor model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287763328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293735360))))[name = string("model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_25_cast_fp16 = conv(dilations = b_25_dilations_0, groups = b_25_groups_0, pad = b_25_pad_0, pad_type = b_25_pad_type_0, strides = b_25_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_255_cast_fp16)[name = string("b_25_cast_fp16")]; - string var_9494_mode_0 = const()[name = string("op_9494_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_9494_cast_fp16 = gelu(mode = var_9494_mode_0, x = x_207_cast_fp16)[name = string("op_9494_cast_fp16")]; - tensor input_257_cast_fp16 = mul(x = var_9494_cast_fp16, y = b_25_cast_fp16)[name = string("input_257_cast_fp16")]; - string e_25_pad_type_0 = const()[name = string("e_25_pad_type_0"), val = string("valid")]; - tensor e_25_strides_0 = const()[name = string("e_25_strides_0"), val = tensor([1, 1])]; - tensor e_25_pad_0 = const()[name = string("e_25_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_25_dilations_0 = const()[name = string("e_25_dilations_0"), val = tensor([1, 1])]; - int32 e_25_groups_0 = const()[name = string("e_25_groups_0"), val = int32(1)]; - tensor model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293846016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299818048))))[name = string("model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_25_cast_fp16 = conv(dilations = e_25_dilations_0, groups = e_25_groups_0, pad = e_25_pad_0, pad_type = e_25_pad_type_0, strides = e_25_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_257_cast_fp16)[name = string("e_25_cast_fp16")]; - tensor var_9502_axes_0 = const()[name = string("op_9502_axes_0"), val = tensor([2])]; - tensor var_9502_cast_fp16 = squeeze(axes = var_9502_axes_0, x = e_25_cast_fp16)[name = string("op_9502_cast_fp16")]; - tensor var_9503 = const()[name = string("op_9503"), val = tensor([0, 2, 1])]; - int32 var_9514 = const()[name = string("op_9514"), val = int32(-1)]; - fp16 const_490_promoted_to_fp16 = const()[name = string("const_490_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_205_cast_fp16 = transpose(perm = var_9503, x = var_9502_cast_fp16)[name = string("transpose_78")]; - tensor var_9516_cast_fp16 = mul(x = hidden_states_205_cast_fp16, y = const_490_promoted_to_fp16)[name = string("op_9516_cast_fp16")]; - bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)]; - tensor input_259_cast_fp16 = concat(axis = var_9514, interleave = input_259_interleave_0, values = (hidden_states_205_cast_fp16, var_9516_cast_fp16))[name = string("input_259_cast_fp16")]; - tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; - fp16 var_9511_to_fp16 = const()[name = string("op_9511_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_9511_to_fp16, x = input_259_cast_fp16)[name = string("normed_309_cast_fp16")]; - tensor normed_311_begin_0 = const()[name = string("normed_311_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_311_end_0 = const()[name = string("normed_311_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_311_end_mask_0 = const()[name = string("normed_311_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_311_cast_fp16 = slice_by_index(begin = normed_311_begin_0, end = normed_311_end_0, end_mask = normed_311_end_mask_0, x = normed_309_cast_fp16)[name = string("normed_311_cast_fp16")]; - tensor var_9530_to_fp16 = const()[name = string("op_9530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299836544)))]; - tensor hidden_states_207_cast_fp16 = mul(x = normed_311_cast_fp16, y = var_9530_to_fp16)[name = string("hidden_states_207_cast_fp16")]; - tensor hidden_states_209_cast_fp16 = add(x = hidden_states_203_cast_fp16, y = hidden_states_207_cast_fp16)[name = string("hidden_states_209_cast_fp16")]; - int32 var_9581 = const()[name = string("op_9581"), val = int32(-1)]; - fp16 const_494_promoted_to_fp16 = const()[name = string("const_494_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9583_cast_fp16 = mul(x = hidden_states_209_cast_fp16, y = const_494_promoted_to_fp16)[name = string("op_9583_cast_fp16")]; - bool input_261_interleave_0 = const()[name = string("input_261_interleave_0"), val = bool(false)]; - tensor input_261_cast_fp16 = concat(axis = var_9581, interleave = input_261_interleave_0, values = (hidden_states_209_cast_fp16, var_9583_cast_fp16))[name = string("input_261_cast_fp16")]; - tensor normed_313_axes_0 = const()[name = string("normed_313_axes_0"), val = tensor([-1])]; - fp16 var_9578_to_fp16 = const()[name = string("op_9578_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_313_cast_fp16 = layer_norm(axes = normed_313_axes_0, epsilon = var_9578_to_fp16, x = input_261_cast_fp16)[name = string("normed_313_cast_fp16")]; - tensor normed_315_begin_0 = const()[name = string("normed_315_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_315_end_0 = const()[name = string("normed_315_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_315_end_mask_0 = const()[name = string("normed_315_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_315_cast_fp16 = slice_by_index(begin = normed_315_begin_0, end = normed_315_end_0, end_mask = normed_315_end_mask_0, x = normed_313_cast_fp16)[name = string("normed_315_cast_fp16")]; - tensor var_9597_to_fp16 = const()[name = string("op_9597_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299838912)))]; - tensor hidden_states_211_cast_fp16 = mul(x = normed_315_cast_fp16, y = var_9597_to_fp16)[name = string("hidden_states_211_cast_fp16")]; - tensor var_9602 = const()[name = string("op_9602"), val = tensor([0, 2, 1])]; - tensor var_9605_axes_0 = const()[name = string("op_9605_axes_0"), val = tensor([2])]; - tensor var_9603_cast_fp16 = transpose(perm = var_9602, x = hidden_states_211_cast_fp16)[name = string("transpose_77")]; - tensor var_9605_cast_fp16 = expand_dims(axes = var_9605_axes_0, x = var_9603_cast_fp16)[name = string("op_9605_cast_fp16")]; - string var_9621_pad_type_0 = const()[name = string("op_9621_pad_type_0"), val = string("valid")]; - tensor var_9621_strides_0 = const()[name = string("op_9621_strides_0"), val = tensor([1, 1])]; - tensor var_9621_pad_0 = const()[name = string("op_9621_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_9621_dilations_0 = const()[name = string("op_9621_dilations_0"), val = tensor([1, 1])]; - int32 var_9621_groups_0 = const()[name = string("op_9621_groups_0"), val = int32(1)]; - tensor var_9621 = conv(dilations = var_9621_dilations_0, groups = var_9621_groups_0, pad = var_9621_pad_0, pad_type = var_9621_pad_type_0, strides = var_9621_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_9605_cast_fp16)[name = string("op_9621")]; - tensor var_9626 = const()[name = string("op_9626"), val = tensor([1, 4, 1, 256])]; - tensor var_9627 = reshape(shape = var_9626, x = var_9621)[name = string("op_9627")]; - string var_9643_pad_type_0 = const()[name = string("op_9643_pad_type_0"), val = string("valid")]; - tensor var_9643_strides_0 = const()[name = string("op_9643_strides_0"), val = tensor([1, 1])]; - tensor var_9643_pad_0 = const()[name = string("op_9643_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_9643_dilations_0 = const()[name = string("op_9643_dilations_0"), val = tensor([1, 1])]; - int32 var_9643_groups_0 = const()[name = string("op_9643_groups_0"), val = int32(1)]; - tensor var_9643 = conv(dilations = var_9643_dilations_0, groups = var_9643_groups_0, pad = var_9643_pad_0, pad_type = var_9643_pad_type_0, strides = var_9643_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_9605_cast_fp16)[name = string("op_9643")]; - tensor var_9648 = const()[name = string("op_9648"), val = tensor([1, 1, 1, 256])]; - tensor var_9649 = reshape(shape = var_9648, x = var_9643)[name = string("op_9649")]; - string var_9665_pad_type_0 = const()[name = string("op_9665_pad_type_0"), val = string("valid")]; - tensor var_9665_strides_0 = const()[name = string("op_9665_strides_0"), val = tensor([1, 1])]; - tensor var_9665_pad_0 = const()[name = string("op_9665_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_9665_dilations_0 = const()[name = string("op_9665_dilations_0"), val = tensor([1, 1])]; - int32 var_9665_groups_0 = const()[name = string("op_9665_groups_0"), val = int32(1)]; - tensor var_9665 = conv(dilations = var_9665_dilations_0, groups = var_9665_groups_0, pad = var_9665_pad_0, pad_type = var_9665_pad_type_0, strides = var_9665_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_9605_cast_fp16)[name = string("op_9665")]; - tensor var_9670 = const()[name = string("op_9670"), val = tensor([1, 1, 1, 256])]; - tensor var_9671 = reshape(shape = var_9670, x = var_9665)[name = string("op_9671")]; - int32 var_9686 = const()[name = string("op_9686"), val = int32(-1)]; - fp16 const_498_promoted = const()[name = string("const_498_promoted"), val = fp16(-0x1p+0)]; - tensor var_9688 = mul(x = var_9627, y = const_498_promoted)[name = string("op_9688")]; - bool input_265_interleave_0 = const()[name = string("input_265_interleave_0"), val = bool(false)]; - tensor input_265 = concat(axis = var_9686, interleave = input_265_interleave_0, values = (var_9627, var_9688))[name = string("input_265")]; - tensor normed_317_axes_0 = const()[name = string("normed_317_axes_0"), val = tensor([-1])]; - fp16 var_9683_to_fp16 = const()[name = string("op_9683_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_317_cast_fp16 = layer_norm(axes = normed_317_axes_0, epsilon = var_9683_to_fp16, x = input_265)[name = string("normed_317_cast_fp16")]; - tensor normed_319_begin_0 = const()[name = string("normed_319_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_319_end_0 = const()[name = string("normed_319_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_319_end_mask_0 = const()[name = string("normed_319_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_319 = slice_by_index(begin = normed_319_begin_0, end = normed_319_end_0, end_mask = normed_319_end_mask_0, x = normed_317_cast_fp16)[name = string("normed_319")]; - tensor var_9702_to_fp16 = const()[name = string("op_9702_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299841280)))]; - tensor q_27_cast_fp16 = mul(x = normed_319, y = var_9702_to_fp16)[name = string("q_27_cast_fp16")]; - int32 var_9713 = const()[name = string("op_9713"), val = int32(-1)]; - fp16 const_502_promoted = const()[name = string("const_502_promoted"), val = fp16(-0x1p+0)]; - tensor var_9715 = mul(x = var_9649, y = const_502_promoted)[name = string("op_9715")]; - bool input_267_interleave_0 = const()[name = string("input_267_interleave_0"), val = bool(false)]; - tensor input_267 = concat(axis = var_9713, interleave = input_267_interleave_0, values = (var_9649, var_9715))[name = string("input_267")]; - tensor normed_321_axes_0 = const()[name = string("normed_321_axes_0"), val = tensor([-1])]; - fp16 var_9710_to_fp16 = const()[name = string("op_9710_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_321_cast_fp16 = layer_norm(axes = normed_321_axes_0, epsilon = var_9710_to_fp16, x = input_267)[name = string("normed_321_cast_fp16")]; - tensor normed_323_begin_0 = const()[name = string("normed_323_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_323_end_0 = const()[name = string("normed_323_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_323_end_mask_0 = const()[name = string("normed_323_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_323 = slice_by_index(begin = normed_323_begin_0, end = normed_323_end_0, end_mask = normed_323_end_mask_0, x = normed_321_cast_fp16)[name = string("normed_323")]; - tensor var_9729_to_fp16 = const()[name = string("op_9729_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299841856)))]; - tensor k_27_cast_fp16 = mul(x = normed_323, y = var_9729_to_fp16)[name = string("k_27_cast_fp16")]; - tensor var_9731_cast_fp16 = mul(x = q_27_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9731_cast_fp16")]; - tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27_cast_fp16)[name = string("x1_53_cast_fp16")]; - tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27_cast_fp16)[name = string("x2_53_cast_fp16")]; - fp16 const_508_promoted_to_fp16 = const()[name = string("const_508_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9752_cast_fp16 = mul(x = x2_53_cast_fp16, y = const_508_promoted_to_fp16)[name = string("op_9752_cast_fp16")]; - int32 var_9754 = const()[name = string("op_9754"), val = int32(-1)]; - bool var_9755_interleave_0 = const()[name = string("op_9755_interleave_0"), val = bool(false)]; - tensor var_9755_cast_fp16 = concat(axis = var_9754, interleave = var_9755_interleave_0, values = (var_9752_cast_fp16, x1_53_cast_fp16))[name = string("op_9755_cast_fp16")]; - tensor var_9756_cast_fp16 = mul(x = var_9755_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9756_cast_fp16")]; - tensor query_states_53_cast_fp16 = add(x = var_9731_cast_fp16, y = var_9756_cast_fp16)[name = string("query_states_53_cast_fp16")]; - tensor var_9759_cast_fp16 = mul(x = k_27_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9759_cast_fp16")]; - tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27_cast_fp16)[name = string("x1_55_cast_fp16")]; - tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27_cast_fp16)[name = string("x2_55_cast_fp16")]; - fp16 const_511_promoted_to_fp16 = const()[name = string("const_511_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9780_cast_fp16 = mul(x = x2_55_cast_fp16, y = const_511_promoted_to_fp16)[name = string("op_9780_cast_fp16")]; - int32 var_9782 = const()[name = string("op_9782"), val = int32(-1)]; - bool var_9783_interleave_0 = const()[name = string("op_9783_interleave_0"), val = bool(false)]; - tensor var_9783_cast_fp16 = concat(axis = var_9782, interleave = var_9783_interleave_0, values = (var_9780_cast_fp16, x1_55_cast_fp16))[name = string("op_9783_cast_fp16")]; - tensor var_9784_cast_fp16 = mul(x = var_9783_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9784_cast_fp16")]; - tensor key_states_53_cast_fp16 = add(x = var_9759_cast_fp16, y = var_9784_cast_fp16)[name = string("key_states_53_cast_fp16")]; - tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([11])]; - tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; - tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; - tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([12])]; - int32 concat_106_axis_0 = const()[name = string("concat_106_axis_0"), val = int32(0)]; - bool concat_106_interleave_0 = const()[name = string("concat_106_interleave_0"), val = bool(false)]; - tensor concat_106 = concat(axis = concat_106_axis_0, interleave = concat_106_interleave_0, values = (expand_dims_156, expand_dims_157, current_pos, expand_dims_159))[name = string("concat_106")]; - tensor concat_107_values1_0 = const()[name = string("concat_107_values1_0"), val = tensor([0])]; - tensor concat_107_values3_0 = const()[name = string("concat_107_values3_0"), val = tensor([0])]; - int32 concat_107_axis_0 = const()[name = string("concat_107_axis_0"), val = int32(0)]; - bool concat_107_interleave_0 = const()[name = string("concat_107_interleave_0"), val = bool(false)]; - tensor concat_107 = concat(axis = concat_107_axis_0, interleave = concat_107_interleave_0, values = (expand_dims_160, concat_107_values1_0, var_1909, concat_107_values3_0))[name = string("concat_107")]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_106, begin_mask = model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0, end = concat_107, end_mask = model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_23_stride_0, update = key_states_53_cast_fp16, x = coreml_update_state_77)[name = string("model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_26_write_state")]; - tensor coreml_update_state_78 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_26")]; - tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([33])]; - tensor expand_dims_163 = const()[name = string("expand_dims_163"), val = tensor([0])]; - tensor expand_dims_165 = const()[name = string("expand_dims_165"), val = tensor([0])]; - tensor expand_dims_166 = const()[name = string("expand_dims_166"), val = tensor([34])]; - int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; - bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; - tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_162, expand_dims_163, current_pos, expand_dims_165))[name = string("concat_110")]; - tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; - tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; - int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; - bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; - tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_166, concat_111_values1_0, var_1909, concat_111_values3_0))[name = string("concat_111")]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_24_stride_0, update = var_9671, x = coreml_update_state_78)[name = string("model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_27_write_state")]; - tensor coreml_update_state_79 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_27")]; - tensor var_9839_begin_0 = const()[name = string("op_9839_begin_0"), val = tensor([11, 0, 0, 0])]; - tensor var_9839_end_0 = const()[name = string("op_9839_end_0"), val = tensor([12, 1, 512, 256])]; - tensor var_9839_end_mask_0 = const()[name = string("op_9839_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_9839_cast_fp16 = slice_by_index(begin = var_9839_begin_0, end = var_9839_end_0, end_mask = var_9839_end_mask_0, x = coreml_update_state_79)[name = string("op_9839_cast_fp16")]; - tensor var_9846_begin_0 = const()[name = string("op_9846_begin_0"), val = tensor([33, 0, 0, 0])]; - tensor var_9846_end_0 = const()[name = string("op_9846_end_0"), val = tensor([34, 1, 512, 256])]; - tensor var_9846_end_mask_0 = const()[name = string("op_9846_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_9846_cast_fp16 = slice_by_index(begin = var_9846_begin_0, end = var_9846_end_0, end_mask = var_9846_end_mask_0, x = coreml_update_state_79)[name = string("op_9846_cast_fp16")]; - tensor var_9883 = const()[name = string("op_9883"), val = tensor([1, 4, 1, 1])]; - tensor x_213_cast_fp16 = tile(reps = var_9883, x = var_9839_cast_fp16)[name = string("x_213_cast_fp16")]; - tensor var_9903 = const()[name = string("op_9903"), val = tensor([1, 4, 1, 1])]; - tensor x_219_cast_fp16 = tile(reps = var_9903, x = var_9846_cast_fp16)[name = string("x_219_cast_fp16")]; - bool var_9930_transpose_x_1 = const()[name = string("op_9930_transpose_x_1"), val = bool(false)]; - bool var_9930_transpose_y_1 = const()[name = string("op_9930_transpose_y_1"), val = bool(true)]; - tensor var_9930 = matmul(transpose_x = var_9930_transpose_x_1, transpose_y = var_9930_transpose_y_1, x = query_states_53_cast_fp16, y = x_213_cast_fp16)[name = string("op_9930")]; - fp16 var_9931_to_fp16 = const()[name = string("op_9931_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_79_cast_fp16 = mul(x = var_9930, y = var_9931_to_fp16)[name = string("attn_weights_79_cast_fp16")]; - tensor attn_weights_81_cast_fp16 = add(x = attn_weights_79_cast_fp16, y = var_2083)[name = string("attn_weights_81_cast_fp16")]; - int32 var_9966 = const()[name = string("op_9966"), val = int32(-1)]; - tensor attn_weights_83_cast_fp16 = softmax(axis = var_9966, x = attn_weights_81_cast_fp16)[name = string("attn_weights_83_cast_fp16")]; - bool attn_output_131_transpose_x_0 = const()[name = string("attn_output_131_transpose_x_0"), val = bool(false)]; - bool attn_output_131_transpose_y_0 = const()[name = string("attn_output_131_transpose_y_0"), val = bool(false)]; - tensor attn_output_131_cast_fp16 = matmul(transpose_x = attn_output_131_transpose_x_0, transpose_y = attn_output_131_transpose_y_0, x = attn_weights_83_cast_fp16, y = x_219_cast_fp16)[name = string("attn_output_131_cast_fp16")]; - tensor var_9977_perm_0 = const()[name = string("op_9977_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_9981 = const()[name = string("op_9981"), val = tensor([1, 1, 1024])]; - tensor var_9977_cast_fp16 = transpose(perm = var_9977_perm_0, x = attn_output_131_cast_fp16)[name = string("transpose_76")]; - tensor attn_output_135_cast_fp16 = reshape(shape = var_9981, x = var_9977_cast_fp16)[name = string("attn_output_135_cast_fp16")]; - tensor var_9986 = const()[name = string("op_9986"), val = tensor([0, 2, 1])]; - string var_10002_pad_type_0 = const()[name = string("op_10002_pad_type_0"), val = string("valid")]; - int32 var_10002_groups_0 = const()[name = string("op_10002_groups_0"), val = int32(1)]; - tensor var_10002_strides_0 = const()[name = string("op_10002_strides_0"), val = tensor([1])]; - tensor var_10002_pad_0 = const()[name = string("op_10002_pad_0"), val = tensor([0, 0])]; - tensor var_10002_dilations_0 = const()[name = string("op_10002_dilations_0"), val = tensor([1])]; - tensor squeeze_13_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299842432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300727232))))[name = string("squeeze_13_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_9987_cast_fp16 = transpose(perm = var_9986, x = attn_output_135_cast_fp16)[name = string("transpose_75")]; - tensor var_10002_cast_fp16 = conv(dilations = var_10002_dilations_0, groups = var_10002_groups_0, pad = var_10002_pad_0, pad_type = var_10002_pad_type_0, strides = var_10002_strides_0, weight = squeeze_13_cast_fp16_to_fp32_to_fp16_palettized, x = var_9987_cast_fp16)[name = string("op_10002_cast_fp16")]; - tensor var_10006 = const()[name = string("op_10006"), val = tensor([0, 2, 1])]; - int32 var_10017 = const()[name = string("op_10017"), val = int32(-1)]; - fp16 const_520_promoted_to_fp16 = const()[name = string("const_520_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_217_cast_fp16 = transpose(perm = var_10006, x = var_10002_cast_fp16)[name = string("transpose_74")]; - tensor var_10019_cast_fp16 = mul(x = hidden_states_217_cast_fp16, y = const_520_promoted_to_fp16)[name = string("op_10019_cast_fp16")]; - bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; - tensor input_271_cast_fp16 = concat(axis = var_10017, interleave = input_271_interleave_0, values = (hidden_states_217_cast_fp16, var_10019_cast_fp16))[name = string("input_271_cast_fp16")]; - tensor normed_325_axes_0 = const()[name = string("normed_325_axes_0"), val = tensor([-1])]; - fp16 var_10014_to_fp16 = const()[name = string("op_10014_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_325_cast_fp16 = layer_norm(axes = normed_325_axes_0, epsilon = var_10014_to_fp16, x = input_271_cast_fp16)[name = string("normed_325_cast_fp16")]; - tensor normed_327_begin_0 = const()[name = string("normed_327_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_327_end_0 = const()[name = string("normed_327_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_327_end_mask_0 = const()[name = string("normed_327_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_327_cast_fp16 = slice_by_index(begin = normed_327_begin_0, end = normed_327_end_0, end_mask = normed_327_end_mask_0, x = normed_325_cast_fp16)[name = string("normed_327_cast_fp16")]; - tensor var_10033_to_fp16 = const()[name = string("op_10033_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300745728)))]; - tensor attn_output_139_cast_fp16 = mul(x = normed_327_cast_fp16, y = var_10033_to_fp16)[name = string("attn_output_139_cast_fp16")]; - tensor hidden_states_219_cast_fp16 = add(x = hidden_states_209_cast_fp16, y = attn_output_139_cast_fp16)[name = string("hidden_states_219_cast_fp16")]; - int32 var_10046 = const()[name = string("op_10046"), val = int32(-1)]; - fp16 const_524_promoted_to_fp16 = const()[name = string("const_524_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10048_cast_fp16 = mul(x = hidden_states_219_cast_fp16, y = const_524_promoted_to_fp16)[name = string("op_10048_cast_fp16")]; - bool input_273_interleave_0 = const()[name = string("input_273_interleave_0"), val = bool(false)]; - tensor input_273_cast_fp16 = concat(axis = var_10046, interleave = input_273_interleave_0, values = (hidden_states_219_cast_fp16, var_10048_cast_fp16))[name = string("input_273_cast_fp16")]; - tensor normed_329_axes_0 = const()[name = string("normed_329_axes_0"), val = tensor([-1])]; - fp16 var_10043_to_fp16 = const()[name = string("op_10043_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_329_cast_fp16 = layer_norm(axes = normed_329_axes_0, epsilon = var_10043_to_fp16, x = input_273_cast_fp16)[name = string("normed_329_cast_fp16")]; - tensor normed_331_begin_0 = const()[name = string("normed_331_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_331_end_0 = const()[name = string("normed_331_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_331_end_mask_0 = const()[name = string("normed_331_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_331_cast_fp16 = slice_by_index(begin = normed_331_begin_0, end = normed_331_end_0, end_mask = normed_331_end_mask_0, x = normed_329_cast_fp16)[name = string("normed_331_cast_fp16")]; - tensor var_10062_to_fp16 = const()[name = string("op_10062_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300748096)))]; - tensor x_221_cast_fp16 = mul(x = normed_331_cast_fp16, y = var_10062_to_fp16)[name = string("x_221_cast_fp16")]; - tensor var_10074 = const()[name = string("op_10074"), val = tensor([0, 2, 1])]; - tensor input_275_axes_0 = const()[name = string("input_275_axes_0"), val = tensor([2])]; - tensor var_10075_cast_fp16 = transpose(perm = var_10074, x = x_221_cast_fp16)[name = string("transpose_73")]; - tensor input_275_cast_fp16 = expand_dims(axes = input_275_axes_0, x = var_10075_cast_fp16)[name = string("input_275_cast_fp16")]; - string x_223_pad_type_0 = const()[name = string("x_223_pad_type_0"), val = string("valid")]; - tensor x_223_strides_0 = const()[name = string("x_223_strides_0"), val = tensor([1, 1])]; - tensor x_223_pad_0 = const()[name = string("x_223_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_223_dilations_0 = const()[name = string("x_223_dilations_0"), val = tensor([1, 1])]; - int32 x_223_groups_0 = const()[name = string("x_223_groups_0"), val = int32(1)]; - tensor model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300750464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306722496))))[name = string("model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_223_cast_fp16 = conv(dilations = x_223_dilations_0, groups = x_223_groups_0, pad = x_223_pad_0, pad_type = x_223_pad_type_0, strides = x_223_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_275_cast_fp16)[name = string("x_223_cast_fp16")]; - string b_27_pad_type_0 = const()[name = string("b_27_pad_type_0"), val = string("valid")]; - tensor b_27_strides_0 = const()[name = string("b_27_strides_0"), val = tensor([1, 1])]; - tensor b_27_pad_0 = const()[name = string("b_27_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_27_dilations_0 = const()[name = string("b_27_dilations_0"), val = tensor([1, 1])]; - int32 b_27_groups_0 = const()[name = string("b_27_groups_0"), val = int32(1)]; - tensor model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(306833152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312805184))))[name = string("model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_27_cast_fp16 = conv(dilations = b_27_dilations_0, groups = b_27_groups_0, pad = b_27_pad_0, pad_type = b_27_pad_type_0, strides = b_27_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_275_cast_fp16)[name = string("b_27_cast_fp16")]; - string var_10100_mode_0 = const()[name = string("op_10100_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_10100_cast_fp16 = gelu(mode = var_10100_mode_0, x = x_223_cast_fp16)[name = string("op_10100_cast_fp16")]; - tensor input_277_cast_fp16 = mul(x = var_10100_cast_fp16, y = b_27_cast_fp16)[name = string("input_277_cast_fp16")]; - string e_27_pad_type_0 = const()[name = string("e_27_pad_type_0"), val = string("valid")]; - tensor e_27_strides_0 = const()[name = string("e_27_strides_0"), val = tensor([1, 1])]; - tensor e_27_pad_0 = const()[name = string("e_27_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_27_dilations_0 = const()[name = string("e_27_dilations_0"), val = tensor([1, 1])]; - int32 e_27_groups_0 = const()[name = string("e_27_groups_0"), val = int32(1)]; - tensor model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312915840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318887872))))[name = string("model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_27_cast_fp16 = conv(dilations = e_27_dilations_0, groups = e_27_groups_0, pad = e_27_pad_0, pad_type = e_27_pad_type_0, strides = e_27_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_277_cast_fp16)[name = string("e_27_cast_fp16")]; - tensor var_10108_axes_0 = const()[name = string("op_10108_axes_0"), val = tensor([2])]; - tensor var_10108_cast_fp16 = squeeze(axes = var_10108_axes_0, x = e_27_cast_fp16)[name = string("op_10108_cast_fp16")]; - tensor var_10109 = const()[name = string("op_10109"), val = tensor([0, 2, 1])]; - int32 var_10120 = const()[name = string("op_10120"), val = int32(-1)]; - fp16 const_528_promoted_to_fp16 = const()[name = string("const_528_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_221_cast_fp16 = transpose(perm = var_10109, x = var_10108_cast_fp16)[name = string("transpose_72")]; - tensor var_10122_cast_fp16 = mul(x = hidden_states_221_cast_fp16, y = const_528_promoted_to_fp16)[name = string("op_10122_cast_fp16")]; - bool input_279_interleave_0 = const()[name = string("input_279_interleave_0"), val = bool(false)]; - tensor input_279_cast_fp16 = concat(axis = var_10120, interleave = input_279_interleave_0, values = (hidden_states_221_cast_fp16, var_10122_cast_fp16))[name = string("input_279_cast_fp16")]; - tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; - fp16 var_10117_to_fp16 = const()[name = string("op_10117_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_10117_to_fp16, x = input_279_cast_fp16)[name = string("normed_333_cast_fp16")]; - tensor normed_335_begin_0 = const()[name = string("normed_335_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_335_end_0 = const()[name = string("normed_335_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_335_end_mask_0 = const()[name = string("normed_335_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_335_cast_fp16 = slice_by_index(begin = normed_335_begin_0, end = normed_335_end_0, end_mask = normed_335_end_mask_0, x = normed_333_cast_fp16)[name = string("normed_335_cast_fp16")]; - tensor var_10136_to_fp16 = const()[name = string("op_10136_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318906368)))]; - tensor hidden_states_223_cast_fp16 = mul(x = normed_335_cast_fp16, y = var_10136_to_fp16)[name = string("hidden_states_223_cast_fp16")]; - tensor hidden_states_225_cast_fp16 = add(x = hidden_states_219_cast_fp16, y = hidden_states_223_cast_fp16)[name = string("hidden_states_225_cast_fp16")]; - int32 var_10187 = const()[name = string("op_10187"), val = int32(-1)]; - fp16 const_532_promoted_to_fp16 = const()[name = string("const_532_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10189_cast_fp16 = mul(x = hidden_states_225_cast_fp16, y = const_532_promoted_to_fp16)[name = string("op_10189_cast_fp16")]; - bool input_281_interleave_0 = const()[name = string("input_281_interleave_0"), val = bool(false)]; - tensor input_281_cast_fp16 = concat(axis = var_10187, interleave = input_281_interleave_0, values = (hidden_states_225_cast_fp16, var_10189_cast_fp16))[name = string("input_281_cast_fp16")]; - tensor normed_337_axes_0 = const()[name = string("normed_337_axes_0"), val = tensor([-1])]; - fp16 var_10184_to_fp16 = const()[name = string("op_10184_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_337_cast_fp16 = layer_norm(axes = normed_337_axes_0, epsilon = var_10184_to_fp16, x = input_281_cast_fp16)[name = string("normed_337_cast_fp16")]; - tensor normed_339_begin_0 = const()[name = string("normed_339_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_339_end_0 = const()[name = string("normed_339_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_339_end_mask_0 = const()[name = string("normed_339_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_339_cast_fp16 = slice_by_index(begin = normed_339_begin_0, end = normed_339_end_0, end_mask = normed_339_end_mask_0, x = normed_337_cast_fp16)[name = string("normed_339_cast_fp16")]; - tensor var_10203_to_fp16 = const()[name = string("op_10203_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318908736)))]; - tensor hidden_states_227_cast_fp16 = mul(x = normed_339_cast_fp16, y = var_10203_to_fp16)[name = string("hidden_states_227_cast_fp16")]; - tensor var_10208 = const()[name = string("op_10208"), val = tensor([0, 2, 1])]; - tensor var_10211_axes_0 = const()[name = string("op_10211_axes_0"), val = tensor([2])]; - tensor var_10209_cast_fp16 = transpose(perm = var_10208, x = hidden_states_227_cast_fp16)[name = string("transpose_71")]; - tensor var_10211_cast_fp16 = expand_dims(axes = var_10211_axes_0, x = var_10209_cast_fp16)[name = string("op_10211_cast_fp16")]; - string var_10227_pad_type_0 = const()[name = string("op_10227_pad_type_0"), val = string("valid")]; - tensor var_10227_strides_0 = const()[name = string("op_10227_strides_0"), val = tensor([1, 1])]; - tensor var_10227_pad_0 = const()[name = string("op_10227_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_10227_dilations_0 = const()[name = string("op_10227_dilations_0"), val = tensor([1, 1])]; - int32 var_10227_groups_0 = const()[name = string("op_10227_groups_0"), val = int32(1)]; - tensor var_10227 = conv(dilations = var_10227_dilations_0, groups = var_10227_groups_0, pad = var_10227_pad_0, pad_type = var_10227_pad_type_0, strides = var_10227_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_10211_cast_fp16)[name = string("op_10227")]; - tensor var_10232 = const()[name = string("op_10232"), val = tensor([1, 4, 1, 256])]; - tensor var_10233 = reshape(shape = var_10232, x = var_10227)[name = string("op_10233")]; - string var_10249_pad_type_0 = const()[name = string("op_10249_pad_type_0"), val = string("valid")]; - tensor var_10249_strides_0 = const()[name = string("op_10249_strides_0"), val = tensor([1, 1])]; - tensor var_10249_pad_0 = const()[name = string("op_10249_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_10249_dilations_0 = const()[name = string("op_10249_dilations_0"), val = tensor([1, 1])]; - int32 var_10249_groups_0 = const()[name = string("op_10249_groups_0"), val = int32(1)]; - tensor var_10249 = conv(dilations = var_10249_dilations_0, groups = var_10249_groups_0, pad = var_10249_pad_0, pad_type = var_10249_pad_type_0, strides = var_10249_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_10211_cast_fp16)[name = string("op_10249")]; - tensor var_10254 = const()[name = string("op_10254"), val = tensor([1, 1, 1, 256])]; - tensor var_10255 = reshape(shape = var_10254, x = var_10249)[name = string("op_10255")]; - string var_10271_pad_type_0 = const()[name = string("op_10271_pad_type_0"), val = string("valid")]; - tensor var_10271_strides_0 = const()[name = string("op_10271_strides_0"), val = tensor([1, 1])]; - tensor var_10271_pad_0 = const()[name = string("op_10271_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_10271_dilations_0 = const()[name = string("op_10271_dilations_0"), val = tensor([1, 1])]; - int32 var_10271_groups_0 = const()[name = string("op_10271_groups_0"), val = int32(1)]; - tensor var_10271 = conv(dilations = var_10271_dilations_0, groups = var_10271_groups_0, pad = var_10271_pad_0, pad_type = var_10271_pad_type_0, strides = var_10271_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_10211_cast_fp16)[name = string("op_10271")]; - tensor var_10276 = const()[name = string("op_10276"), val = tensor([1, 1, 1, 256])]; - tensor var_10277 = reshape(shape = var_10276, x = var_10271)[name = string("op_10277")]; - int32 var_10292 = const()[name = string("op_10292"), val = int32(-1)]; - fp16 const_536_promoted = const()[name = string("const_536_promoted"), val = fp16(-0x1p+0)]; - tensor var_10294 = mul(x = var_10233, y = const_536_promoted)[name = string("op_10294")]; - bool input_285_interleave_0 = const()[name = string("input_285_interleave_0"), val = bool(false)]; - tensor input_285 = concat(axis = var_10292, interleave = input_285_interleave_0, values = (var_10233, var_10294))[name = string("input_285")]; - tensor normed_341_axes_0 = const()[name = string("normed_341_axes_0"), val = tensor([-1])]; - fp16 var_10289_to_fp16 = const()[name = string("op_10289_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_341_cast_fp16 = layer_norm(axes = normed_341_axes_0, epsilon = var_10289_to_fp16, x = input_285)[name = string("normed_341_cast_fp16")]; - tensor normed_343_begin_0 = const()[name = string("normed_343_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_343_end_0 = const()[name = string("normed_343_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_343_end_mask_0 = const()[name = string("normed_343_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_343 = slice_by_index(begin = normed_343_begin_0, end = normed_343_end_0, end_mask = normed_343_end_mask_0, x = normed_341_cast_fp16)[name = string("normed_343")]; - tensor var_10308_to_fp16 = const()[name = string("op_10308_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318911104)))]; - tensor q_29_cast_fp16 = mul(x = normed_343, y = var_10308_to_fp16)[name = string("q_29_cast_fp16")]; - int32 var_10319 = const()[name = string("op_10319"), val = int32(-1)]; - fp16 const_540_promoted = const()[name = string("const_540_promoted"), val = fp16(-0x1p+0)]; - tensor var_10321 = mul(x = var_10255, y = const_540_promoted)[name = string("op_10321")]; - bool input_287_interleave_0 = const()[name = string("input_287_interleave_0"), val = bool(false)]; - tensor input_287 = concat(axis = var_10319, interleave = input_287_interleave_0, values = (var_10255, var_10321))[name = string("input_287")]; - tensor normed_345_axes_0 = const()[name = string("normed_345_axes_0"), val = tensor([-1])]; - fp16 var_10316_to_fp16 = const()[name = string("op_10316_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_345_cast_fp16 = layer_norm(axes = normed_345_axes_0, epsilon = var_10316_to_fp16, x = input_287)[name = string("normed_345_cast_fp16")]; - tensor normed_347_begin_0 = const()[name = string("normed_347_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_347_end_0 = const()[name = string("normed_347_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_347_end_mask_0 = const()[name = string("normed_347_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_347 = slice_by_index(begin = normed_347_begin_0, end = normed_347_end_0, end_mask = normed_347_end_mask_0, x = normed_345_cast_fp16)[name = string("normed_347")]; - tensor var_10335_to_fp16 = const()[name = string("op_10335_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318911680)))]; - tensor k_29_cast_fp16 = mul(x = normed_347, y = var_10335_to_fp16)[name = string("k_29_cast_fp16")]; - tensor var_10337_cast_fp16 = mul(x = q_29_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10337_cast_fp16")]; - tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29_cast_fp16)[name = string("x1_57_cast_fp16")]; - tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29_cast_fp16)[name = string("x2_57_cast_fp16")]; - fp16 const_546_promoted_to_fp16 = const()[name = string("const_546_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10358_cast_fp16 = mul(x = x2_57_cast_fp16, y = const_546_promoted_to_fp16)[name = string("op_10358_cast_fp16")]; - int32 var_10360 = const()[name = string("op_10360"), val = int32(-1)]; - bool var_10361_interleave_0 = const()[name = string("op_10361_interleave_0"), val = bool(false)]; - tensor var_10361_cast_fp16 = concat(axis = var_10360, interleave = var_10361_interleave_0, values = (var_10358_cast_fp16, x1_57_cast_fp16))[name = string("op_10361_cast_fp16")]; - tensor var_10362_cast_fp16 = mul(x = var_10361_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10362_cast_fp16")]; - tensor query_states_57_cast_fp16 = add(x = var_10337_cast_fp16, y = var_10362_cast_fp16)[name = string("query_states_57_cast_fp16")]; - tensor var_10365_cast_fp16 = mul(x = k_29_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10365_cast_fp16")]; - tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29_cast_fp16)[name = string("x1_59_cast_fp16")]; - tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29_cast_fp16)[name = string("x2_59_cast_fp16")]; - fp16 const_549_promoted_to_fp16 = const()[name = string("const_549_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10386_cast_fp16 = mul(x = x2_59_cast_fp16, y = const_549_promoted_to_fp16)[name = string("op_10386_cast_fp16")]; - int32 var_10388 = const()[name = string("op_10388"), val = int32(-1)]; - bool var_10389_interleave_0 = const()[name = string("op_10389_interleave_0"), val = bool(false)]; - tensor var_10389_cast_fp16 = concat(axis = var_10388, interleave = var_10389_interleave_0, values = (var_10386_cast_fp16, x1_59_cast_fp16))[name = string("op_10389_cast_fp16")]; - tensor var_10390_cast_fp16 = mul(x = var_10389_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10390_cast_fp16")]; - tensor key_states_57_cast_fp16 = add(x = var_10365_cast_fp16, y = var_10390_cast_fp16)[name = string("key_states_57_cast_fp16")]; - tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([12])]; - tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; - tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; - tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([13])]; - int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; - bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; - tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_168, expand_dims_169, current_pos, expand_dims_171))[name = string("concat_114")]; - tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; - tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; - int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; - bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; - tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_172, concat_115_values1_0, var_1909, concat_115_values3_0))[name = string("concat_115")]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_25_stride_0, update = key_states_57_cast_fp16, x = coreml_update_state_79)[name = string("model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_28_write_state")]; - tensor coreml_update_state_80 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_28")]; - tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([34])]; - tensor expand_dims_175 = const()[name = string("expand_dims_175"), val = tensor([0])]; - tensor expand_dims_177 = const()[name = string("expand_dims_177"), val = tensor([0])]; - tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([35])]; - int32 concat_118_axis_0 = const()[name = string("concat_118_axis_0"), val = int32(0)]; - bool concat_118_interleave_0 = const()[name = string("concat_118_interleave_0"), val = bool(false)]; - tensor concat_118 = concat(axis = concat_118_axis_0, interleave = concat_118_interleave_0, values = (expand_dims_174, expand_dims_175, current_pos, expand_dims_177))[name = string("concat_118")]; - tensor concat_119_values1_0 = const()[name = string("concat_119_values1_0"), val = tensor([0])]; - tensor concat_119_values3_0 = const()[name = string("concat_119_values3_0"), val = tensor([0])]; - int32 concat_119_axis_0 = const()[name = string("concat_119_axis_0"), val = int32(0)]; - bool concat_119_interleave_0 = const()[name = string("concat_119_interleave_0"), val = bool(false)]; - tensor concat_119 = concat(axis = concat_119_axis_0, interleave = concat_119_interleave_0, values = (expand_dims_178, concat_119_values1_0, var_1909, concat_119_values3_0))[name = string("concat_119")]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_118, begin_mask = model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0, end = concat_119, end_mask = model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_26_stride_0, update = var_10277, x = coreml_update_state_80)[name = string("model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_29_write_state")]; - tensor coreml_update_state_81 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_29")]; - tensor var_10445_begin_0 = const()[name = string("op_10445_begin_0"), val = tensor([12, 0, 0, 0])]; - tensor var_10445_end_0 = const()[name = string("op_10445_end_0"), val = tensor([13, 1, 512, 256])]; - tensor var_10445_end_mask_0 = const()[name = string("op_10445_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_10445_cast_fp16 = slice_by_index(begin = var_10445_begin_0, end = var_10445_end_0, end_mask = var_10445_end_mask_0, x = coreml_update_state_81)[name = string("op_10445_cast_fp16")]; - tensor var_10452_begin_0 = const()[name = string("op_10452_begin_0"), val = tensor([34, 0, 0, 0])]; - tensor var_10452_end_0 = const()[name = string("op_10452_end_0"), val = tensor([35, 1, 512, 256])]; - tensor var_10452_end_mask_0 = const()[name = string("op_10452_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_10452_cast_fp16 = slice_by_index(begin = var_10452_begin_0, end = var_10452_end_0, end_mask = var_10452_end_mask_0, x = coreml_update_state_81)[name = string("op_10452_cast_fp16")]; - tensor var_10489 = const()[name = string("op_10489"), val = tensor([1, 4, 1, 1])]; - tensor x_229_cast_fp16 = tile(reps = var_10489, x = var_10445_cast_fp16)[name = string("x_229_cast_fp16")]; - tensor var_10509 = const()[name = string("op_10509"), val = tensor([1, 4, 1, 1])]; - tensor x_235_cast_fp16 = tile(reps = var_10509, x = var_10452_cast_fp16)[name = string("x_235_cast_fp16")]; - bool var_10536_transpose_x_1 = const()[name = string("op_10536_transpose_x_1"), val = bool(false)]; - bool var_10536_transpose_y_1 = const()[name = string("op_10536_transpose_y_1"), val = bool(true)]; - tensor var_10536 = matmul(transpose_x = var_10536_transpose_x_1, transpose_y = var_10536_transpose_y_1, x = query_states_57_cast_fp16, y = x_229_cast_fp16)[name = string("op_10536")]; - fp16 var_10537_to_fp16 = const()[name = string("op_10537_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_85_cast_fp16 = mul(x = var_10536, y = var_10537_to_fp16)[name = string("attn_weights_85_cast_fp16")]; - tensor attn_weights_87_cast_fp16 = add(x = attn_weights_85_cast_fp16, y = var_2083)[name = string("attn_weights_87_cast_fp16")]; - int32 var_10572 = const()[name = string("op_10572"), val = int32(-1)]; - tensor attn_weights_89_cast_fp16 = softmax(axis = var_10572, x = attn_weights_87_cast_fp16)[name = string("attn_weights_89_cast_fp16")]; - bool attn_output_141_transpose_x_0 = const()[name = string("attn_output_141_transpose_x_0"), val = bool(false)]; - bool attn_output_141_transpose_y_0 = const()[name = string("attn_output_141_transpose_y_0"), val = bool(false)]; - tensor attn_output_141_cast_fp16 = matmul(transpose_x = attn_output_141_transpose_x_0, transpose_y = attn_output_141_transpose_y_0, x = attn_weights_89_cast_fp16, y = x_235_cast_fp16)[name = string("attn_output_141_cast_fp16")]; - tensor var_10583_perm_0 = const()[name = string("op_10583_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_10587 = const()[name = string("op_10587"), val = tensor([1, 1, 1024])]; - tensor var_10583_cast_fp16 = transpose(perm = var_10583_perm_0, x = attn_output_141_cast_fp16)[name = string("transpose_70")]; - tensor attn_output_145_cast_fp16 = reshape(shape = var_10587, x = var_10583_cast_fp16)[name = string("attn_output_145_cast_fp16")]; - tensor var_10592 = const()[name = string("op_10592"), val = tensor([0, 2, 1])]; - string var_10608_pad_type_0 = const()[name = string("op_10608_pad_type_0"), val = string("valid")]; - int32 var_10608_groups_0 = const()[name = string("op_10608_groups_0"), val = int32(1)]; - tensor var_10608_strides_0 = const()[name = string("op_10608_strides_0"), val = tensor([1])]; - tensor var_10608_pad_0 = const()[name = string("op_10608_pad_0"), val = tensor([0, 0])]; - tensor var_10608_dilations_0 = const()[name = string("op_10608_dilations_0"), val = tensor([1])]; - tensor squeeze_14_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318912256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319797056))))[name = string("squeeze_14_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_10593_cast_fp16 = transpose(perm = var_10592, x = attn_output_145_cast_fp16)[name = string("transpose_69")]; - tensor var_10608_cast_fp16 = conv(dilations = var_10608_dilations_0, groups = var_10608_groups_0, pad = var_10608_pad_0, pad_type = var_10608_pad_type_0, strides = var_10608_strides_0, weight = squeeze_14_cast_fp16_to_fp32_to_fp16_palettized, x = var_10593_cast_fp16)[name = string("op_10608_cast_fp16")]; - tensor var_10612 = const()[name = string("op_10612"), val = tensor([0, 2, 1])]; - int32 var_10623 = const()[name = string("op_10623"), val = int32(-1)]; - fp16 const_558_promoted_to_fp16 = const()[name = string("const_558_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_233_cast_fp16 = transpose(perm = var_10612, x = var_10608_cast_fp16)[name = string("transpose_68")]; - tensor var_10625_cast_fp16 = mul(x = hidden_states_233_cast_fp16, y = const_558_promoted_to_fp16)[name = string("op_10625_cast_fp16")]; - bool input_291_interleave_0 = const()[name = string("input_291_interleave_0"), val = bool(false)]; - tensor input_291_cast_fp16 = concat(axis = var_10623, interleave = input_291_interleave_0, values = (hidden_states_233_cast_fp16, var_10625_cast_fp16))[name = string("input_291_cast_fp16")]; - tensor normed_349_axes_0 = const()[name = string("normed_349_axes_0"), val = tensor([-1])]; - fp16 var_10620_to_fp16 = const()[name = string("op_10620_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_349_cast_fp16 = layer_norm(axes = normed_349_axes_0, epsilon = var_10620_to_fp16, x = input_291_cast_fp16)[name = string("normed_349_cast_fp16")]; - tensor normed_351_begin_0 = const()[name = string("normed_351_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_351_end_0 = const()[name = string("normed_351_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_351_end_mask_0 = const()[name = string("normed_351_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_351_cast_fp16 = slice_by_index(begin = normed_351_begin_0, end = normed_351_end_0, end_mask = normed_351_end_mask_0, x = normed_349_cast_fp16)[name = string("normed_351_cast_fp16")]; - tensor var_10639_to_fp16 = const()[name = string("op_10639_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319815552)))]; - tensor attn_output_149_cast_fp16 = mul(x = normed_351_cast_fp16, y = var_10639_to_fp16)[name = string("attn_output_149_cast_fp16")]; - tensor hidden_states_235_cast_fp16 = add(x = hidden_states_225_cast_fp16, y = attn_output_149_cast_fp16)[name = string("hidden_states_235_cast_fp16")]; - int32 var_10652 = const()[name = string("op_10652"), val = int32(-1)]; - fp16 const_562_promoted_to_fp16 = const()[name = string("const_562_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10654_cast_fp16 = mul(x = hidden_states_235_cast_fp16, y = const_562_promoted_to_fp16)[name = string("op_10654_cast_fp16")]; - bool input_293_interleave_0 = const()[name = string("input_293_interleave_0"), val = bool(false)]; - tensor input_293_cast_fp16 = concat(axis = var_10652, interleave = input_293_interleave_0, values = (hidden_states_235_cast_fp16, var_10654_cast_fp16))[name = string("input_293_cast_fp16")]; - tensor normed_353_axes_0 = const()[name = string("normed_353_axes_0"), val = tensor([-1])]; - fp16 var_10649_to_fp16 = const()[name = string("op_10649_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_353_cast_fp16 = layer_norm(axes = normed_353_axes_0, epsilon = var_10649_to_fp16, x = input_293_cast_fp16)[name = string("normed_353_cast_fp16")]; - tensor normed_355_begin_0 = const()[name = string("normed_355_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_355_end_0 = const()[name = string("normed_355_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_355_end_mask_0 = const()[name = string("normed_355_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_355_cast_fp16 = slice_by_index(begin = normed_355_begin_0, end = normed_355_end_0, end_mask = normed_355_end_mask_0, x = normed_353_cast_fp16)[name = string("normed_355_cast_fp16")]; - tensor var_10668_to_fp16 = const()[name = string("op_10668_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319817920)))]; - tensor x_237_cast_fp16 = mul(x = normed_355_cast_fp16, y = var_10668_to_fp16)[name = string("x_237_cast_fp16")]; - tensor var_10680 = const()[name = string("op_10680"), val = tensor([0, 2, 1])]; - tensor input_295_axes_0 = const()[name = string("input_295_axes_0"), val = tensor([2])]; - tensor var_10681_cast_fp16 = transpose(perm = var_10680, x = x_237_cast_fp16)[name = string("transpose_67")]; - tensor input_295_cast_fp16 = expand_dims(axes = input_295_axes_0, x = var_10681_cast_fp16)[name = string("input_295_cast_fp16")]; - string x_239_pad_type_0 = const()[name = string("x_239_pad_type_0"), val = string("valid")]; - tensor x_239_strides_0 = const()[name = string("x_239_strides_0"), val = tensor([1, 1])]; - tensor x_239_pad_0 = const()[name = string("x_239_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_239_dilations_0 = const()[name = string("x_239_dilations_0"), val = tensor([1, 1])]; - int32 x_239_groups_0 = const()[name = string("x_239_groups_0"), val = int32(1)]; - tensor model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319820288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325792320))))[name = string("model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_239_cast_fp16 = conv(dilations = x_239_dilations_0, groups = x_239_groups_0, pad = x_239_pad_0, pad_type = x_239_pad_type_0, strides = x_239_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_295_cast_fp16)[name = string("x_239_cast_fp16")]; - string b_29_pad_type_0 = const()[name = string("b_29_pad_type_0"), val = string("valid")]; - tensor b_29_strides_0 = const()[name = string("b_29_strides_0"), val = tensor([1, 1])]; - tensor b_29_pad_0 = const()[name = string("b_29_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_29_dilations_0 = const()[name = string("b_29_dilations_0"), val = tensor([1, 1])]; - int32 b_29_groups_0 = const()[name = string("b_29_groups_0"), val = int32(1)]; - tensor model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325902976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(331875008))))[name = string("model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_29_cast_fp16 = conv(dilations = b_29_dilations_0, groups = b_29_groups_0, pad = b_29_pad_0, pad_type = b_29_pad_type_0, strides = b_29_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_295_cast_fp16)[name = string("b_29_cast_fp16")]; - string var_10706_mode_0 = const()[name = string("op_10706_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_10706_cast_fp16 = gelu(mode = var_10706_mode_0, x = x_239_cast_fp16)[name = string("op_10706_cast_fp16")]; - tensor input_297_cast_fp16 = mul(x = var_10706_cast_fp16, y = b_29_cast_fp16)[name = string("input_297_cast_fp16")]; - string e_29_pad_type_0 = const()[name = string("e_29_pad_type_0"), val = string("valid")]; - tensor e_29_strides_0 = const()[name = string("e_29_strides_0"), val = tensor([1, 1])]; - tensor e_29_pad_0 = const()[name = string("e_29_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_29_dilations_0 = const()[name = string("e_29_dilations_0"), val = tensor([1, 1])]; - int32 e_29_groups_0 = const()[name = string("e_29_groups_0"), val = int32(1)]; - tensor model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(331985664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337957696))))[name = string("model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_29_cast_fp16 = conv(dilations = e_29_dilations_0, groups = e_29_groups_0, pad = e_29_pad_0, pad_type = e_29_pad_type_0, strides = e_29_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_297_cast_fp16)[name = string("e_29_cast_fp16")]; - tensor var_10714_axes_0 = const()[name = string("op_10714_axes_0"), val = tensor([2])]; - tensor var_10714_cast_fp16 = squeeze(axes = var_10714_axes_0, x = e_29_cast_fp16)[name = string("op_10714_cast_fp16")]; - tensor var_10715 = const()[name = string("op_10715"), val = tensor([0, 2, 1])]; - int32 var_10726 = const()[name = string("op_10726"), val = int32(-1)]; - fp16 const_566_promoted_to_fp16 = const()[name = string("const_566_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_237_cast_fp16 = transpose(perm = var_10715, x = var_10714_cast_fp16)[name = string("transpose_66")]; - tensor var_10728_cast_fp16 = mul(x = hidden_states_237_cast_fp16, y = const_566_promoted_to_fp16)[name = string("op_10728_cast_fp16")]; - bool input_299_interleave_0 = const()[name = string("input_299_interleave_0"), val = bool(false)]; - tensor input_299_cast_fp16 = concat(axis = var_10726, interleave = input_299_interleave_0, values = (hidden_states_237_cast_fp16, var_10728_cast_fp16))[name = string("input_299_cast_fp16")]; - tensor normed_357_axes_0 = const()[name = string("normed_357_axes_0"), val = tensor([-1])]; - fp16 var_10723_to_fp16 = const()[name = string("op_10723_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_357_cast_fp16 = layer_norm(axes = normed_357_axes_0, epsilon = var_10723_to_fp16, x = input_299_cast_fp16)[name = string("normed_357_cast_fp16")]; - tensor normed_359_begin_0 = const()[name = string("normed_359_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_359_end_0 = const()[name = string("normed_359_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_359_end_mask_0 = const()[name = string("normed_359_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_359_cast_fp16 = slice_by_index(begin = normed_359_begin_0, end = normed_359_end_0, end_mask = normed_359_end_mask_0, x = normed_357_cast_fp16)[name = string("normed_359_cast_fp16")]; - tensor var_10742_to_fp16 = const()[name = string("op_10742_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337976192)))]; - tensor hidden_states_239_cast_fp16 = mul(x = normed_359_cast_fp16, y = var_10742_to_fp16)[name = string("hidden_states_239_cast_fp16")]; - tensor hidden_states_241_cast_fp16 = add(x = hidden_states_235_cast_fp16, y = hidden_states_239_cast_fp16)[name = string("hidden_states_241_cast_fp16")]; - int32 var_10793 = const()[name = string("op_10793"), val = int32(-1)]; - fp16 const_570_promoted_to_fp16 = const()[name = string("const_570_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10795_cast_fp16 = mul(x = hidden_states_241_cast_fp16, y = const_570_promoted_to_fp16)[name = string("op_10795_cast_fp16")]; - bool input_301_interleave_0 = const()[name = string("input_301_interleave_0"), val = bool(false)]; - tensor input_301_cast_fp16 = concat(axis = var_10793, interleave = input_301_interleave_0, values = (hidden_states_241_cast_fp16, var_10795_cast_fp16))[name = string("input_301_cast_fp16")]; - tensor normed_361_axes_0 = const()[name = string("normed_361_axes_0"), val = tensor([-1])]; - fp16 var_10790_to_fp16 = const()[name = string("op_10790_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_361_cast_fp16 = layer_norm(axes = normed_361_axes_0, epsilon = var_10790_to_fp16, x = input_301_cast_fp16)[name = string("normed_361_cast_fp16")]; - tensor normed_363_begin_0 = const()[name = string("normed_363_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_363_end_0 = const()[name = string("normed_363_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_363_end_mask_0 = const()[name = string("normed_363_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_363_cast_fp16 = slice_by_index(begin = normed_363_begin_0, end = normed_363_end_0, end_mask = normed_363_end_mask_0, x = normed_361_cast_fp16)[name = string("normed_363_cast_fp16")]; - tensor var_10809_to_fp16 = const()[name = string("op_10809_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337978560)))]; - tensor hidden_states_243_cast_fp16 = mul(x = normed_363_cast_fp16, y = var_10809_to_fp16)[name = string("hidden_states_243_cast_fp16")]; - tensor var_10814 = const()[name = string("op_10814"), val = tensor([0, 2, 1])]; - tensor var_10817_axes_0 = const()[name = string("op_10817_axes_0"), val = tensor([2])]; - tensor var_10815_cast_fp16 = transpose(perm = var_10814, x = hidden_states_243_cast_fp16)[name = string("transpose_65")]; - tensor var_10817_cast_fp16 = expand_dims(axes = var_10817_axes_0, x = var_10815_cast_fp16)[name = string("op_10817_cast_fp16")]; - string var_10833_pad_type_0 = const()[name = string("op_10833_pad_type_0"), val = string("valid")]; - tensor var_10833_strides_0 = const()[name = string("op_10833_strides_0"), val = tensor([1, 1])]; - tensor var_10833_pad_0 = const()[name = string("op_10833_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_10833_dilations_0 = const()[name = string("op_10833_dilations_0"), val = tensor([1, 1])]; - int32 var_10833_groups_0 = const()[name = string("op_10833_groups_0"), val = int32(1)]; - tensor var_10833 = conv(dilations = var_10833_dilations_0, groups = var_10833_groups_0, pad = var_10833_pad_0, pad_type = var_10833_pad_type_0, strides = var_10833_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_10817_cast_fp16)[name = string("op_10833")]; - tensor var_10838 = const()[name = string("op_10838"), val = tensor([1, 4, 1, 256])]; - tensor var_10839 = reshape(shape = var_10838, x = var_10833)[name = string("op_10839")]; - string var_10855_pad_type_0 = const()[name = string("op_10855_pad_type_0"), val = string("valid")]; - tensor var_10855_strides_0 = const()[name = string("op_10855_strides_0"), val = tensor([1, 1])]; - tensor var_10855_pad_0 = const()[name = string("op_10855_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_10855_dilations_0 = const()[name = string("op_10855_dilations_0"), val = tensor([1, 1])]; - int32 var_10855_groups_0 = const()[name = string("op_10855_groups_0"), val = int32(1)]; - tensor var_10855 = conv(dilations = var_10855_dilations_0, groups = var_10855_groups_0, pad = var_10855_pad_0, pad_type = var_10855_pad_type_0, strides = var_10855_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_10817_cast_fp16)[name = string("op_10855")]; - tensor var_10860 = const()[name = string("op_10860"), val = tensor([1, 1, 1, 256])]; - tensor var_10861 = reshape(shape = var_10860, x = var_10855)[name = string("op_10861")]; - string var_10877_pad_type_0 = const()[name = string("op_10877_pad_type_0"), val = string("valid")]; - tensor var_10877_strides_0 = const()[name = string("op_10877_strides_0"), val = tensor([1, 1])]; - tensor var_10877_pad_0 = const()[name = string("op_10877_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_10877_dilations_0 = const()[name = string("op_10877_dilations_0"), val = tensor([1, 1])]; - int32 var_10877_groups_0 = const()[name = string("op_10877_groups_0"), val = int32(1)]; - tensor var_10877 = conv(dilations = var_10877_dilations_0, groups = var_10877_groups_0, pad = var_10877_pad_0, pad_type = var_10877_pad_type_0, strides = var_10877_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_10817_cast_fp16)[name = string("op_10877")]; - tensor var_10882 = const()[name = string("op_10882"), val = tensor([1, 1, 1, 256])]; - tensor var_10883 = reshape(shape = var_10882, x = var_10877)[name = string("op_10883")]; - int32 var_10898 = const()[name = string("op_10898"), val = int32(-1)]; - fp16 const_574_promoted = const()[name = string("const_574_promoted"), val = fp16(-0x1p+0)]; - tensor var_10900 = mul(x = var_10839, y = const_574_promoted)[name = string("op_10900")]; - bool input_305_interleave_0 = const()[name = string("input_305_interleave_0"), val = bool(false)]; - tensor input_305 = concat(axis = var_10898, interleave = input_305_interleave_0, values = (var_10839, var_10900))[name = string("input_305")]; - tensor normed_365_axes_0 = const()[name = string("normed_365_axes_0"), val = tensor([-1])]; - fp16 var_10895_to_fp16 = const()[name = string("op_10895_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_365_cast_fp16 = layer_norm(axes = normed_365_axes_0, epsilon = var_10895_to_fp16, x = input_305)[name = string("normed_365_cast_fp16")]; - tensor normed_367_begin_0 = const()[name = string("normed_367_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_367_end_0 = const()[name = string("normed_367_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_367_end_mask_0 = const()[name = string("normed_367_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_367 = slice_by_index(begin = normed_367_begin_0, end = normed_367_end_0, end_mask = normed_367_end_mask_0, x = normed_365_cast_fp16)[name = string("normed_367")]; - tensor var_10914_to_fp16 = const()[name = string("op_10914_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337980928)))]; - tensor q_31_cast_fp16 = mul(x = normed_367, y = var_10914_to_fp16)[name = string("q_31_cast_fp16")]; - int32 var_10925 = const()[name = string("op_10925"), val = int32(-1)]; - fp16 const_578_promoted = const()[name = string("const_578_promoted"), val = fp16(-0x1p+0)]; - tensor var_10927 = mul(x = var_10861, y = const_578_promoted)[name = string("op_10927")]; - bool input_307_interleave_0 = const()[name = string("input_307_interleave_0"), val = bool(false)]; - tensor input_307 = concat(axis = var_10925, interleave = input_307_interleave_0, values = (var_10861, var_10927))[name = string("input_307")]; - tensor normed_369_axes_0 = const()[name = string("normed_369_axes_0"), val = tensor([-1])]; - fp16 var_10922_to_fp16 = const()[name = string("op_10922_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_369_cast_fp16 = layer_norm(axes = normed_369_axes_0, epsilon = var_10922_to_fp16, x = input_307)[name = string("normed_369_cast_fp16")]; - tensor normed_371_begin_0 = const()[name = string("normed_371_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_371_end_0 = const()[name = string("normed_371_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_371_end_mask_0 = const()[name = string("normed_371_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_371 = slice_by_index(begin = normed_371_begin_0, end = normed_371_end_0, end_mask = normed_371_end_mask_0, x = normed_369_cast_fp16)[name = string("normed_371")]; - tensor var_10941_to_fp16 = const()[name = string("op_10941_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337981504)))]; - tensor k_31_cast_fp16 = mul(x = normed_371, y = var_10941_to_fp16)[name = string("k_31_cast_fp16")]; - tensor var_10943_cast_fp16 = mul(x = q_31_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10943_cast_fp16")]; - tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_31_cast_fp16)[name = string("x1_61_cast_fp16")]; - tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_31_cast_fp16)[name = string("x2_61_cast_fp16")]; - fp16 const_584_promoted_to_fp16 = const()[name = string("const_584_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10964_cast_fp16 = mul(x = x2_61_cast_fp16, y = const_584_promoted_to_fp16)[name = string("op_10964_cast_fp16")]; - int32 var_10966 = const()[name = string("op_10966"), val = int32(-1)]; - bool var_10967_interleave_0 = const()[name = string("op_10967_interleave_0"), val = bool(false)]; - tensor var_10967_cast_fp16 = concat(axis = var_10966, interleave = var_10967_interleave_0, values = (var_10964_cast_fp16, x1_61_cast_fp16))[name = string("op_10967_cast_fp16")]; - tensor var_10968_cast_fp16 = mul(x = var_10967_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10968_cast_fp16")]; - tensor query_states_61_cast_fp16 = add(x = var_10943_cast_fp16, y = var_10968_cast_fp16)[name = string("query_states_61_cast_fp16")]; - tensor var_10971_cast_fp16 = mul(x = k_31_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10971_cast_fp16")]; - tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_63_cast_fp16 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_31_cast_fp16)[name = string("x1_63_cast_fp16")]; - tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_63_cast_fp16 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_31_cast_fp16)[name = string("x2_63_cast_fp16")]; - fp16 const_587_promoted_to_fp16 = const()[name = string("const_587_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10992_cast_fp16 = mul(x = x2_63_cast_fp16, y = const_587_promoted_to_fp16)[name = string("op_10992_cast_fp16")]; - int32 var_10994 = const()[name = string("op_10994"), val = int32(-1)]; - bool var_10995_interleave_0 = const()[name = string("op_10995_interleave_0"), val = bool(false)]; - tensor var_10995_cast_fp16 = concat(axis = var_10994, interleave = var_10995_interleave_0, values = (var_10992_cast_fp16, x1_63_cast_fp16))[name = string("op_10995_cast_fp16")]; - tensor var_10996_cast_fp16 = mul(x = var_10995_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10996_cast_fp16")]; - tensor key_states_61_cast_fp16 = add(x = var_10971_cast_fp16, y = var_10996_cast_fp16)[name = string("key_states_61_cast_fp16")]; - tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([13])]; - tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; - tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; - tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([14])]; - int32 concat_122_axis_0 = const()[name = string("concat_122_axis_0"), val = int32(0)]; - bool concat_122_interleave_0 = const()[name = string("concat_122_interleave_0"), val = bool(false)]; - tensor concat_122 = concat(axis = concat_122_axis_0, interleave = concat_122_interleave_0, values = (expand_dims_180, expand_dims_181, current_pos, expand_dims_183))[name = string("concat_122")]; - tensor concat_123_values1_0 = const()[name = string("concat_123_values1_0"), val = tensor([0])]; - tensor concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor([0])]; - int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)]; - bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)]; - tensor concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (expand_dims_184, concat_123_values1_0, var_1909, concat_123_values3_0))[name = string("concat_123")]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_122, begin_mask = model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0, end = concat_123, end_mask = model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_27_stride_0, update = key_states_61_cast_fp16, x = coreml_update_state_81)[name = string("model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_30_write_state")]; - tensor coreml_update_state_82 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_30")]; - tensor expand_dims_186 = const()[name = string("expand_dims_186"), val = tensor([35])]; - tensor expand_dims_187 = const()[name = string("expand_dims_187"), val = tensor([0])]; - tensor expand_dims_189 = const()[name = string("expand_dims_189"), val = tensor([0])]; - tensor expand_dims_190 = const()[name = string("expand_dims_190"), val = tensor([36])]; - int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; - bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; - tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (expand_dims_186, expand_dims_187, current_pos, expand_dims_189))[name = string("concat_126")]; - tensor concat_127_values1_0 = const()[name = string("concat_127_values1_0"), val = tensor([0])]; - tensor concat_127_values3_0 = const()[name = string("concat_127_values3_0"), val = tensor([0])]; - int32 concat_127_axis_0 = const()[name = string("concat_127_axis_0"), val = int32(0)]; - bool concat_127_interleave_0 = const()[name = string("concat_127_interleave_0"), val = bool(false)]; - tensor concat_127 = concat(axis = concat_127_axis_0, interleave = concat_127_interleave_0, values = (expand_dims_190, concat_127_values1_0, var_1909, concat_127_values3_0))[name = string("concat_127")]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_126, begin_mask = model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0, end = concat_127, end_mask = model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_28_stride_0, update = var_10883, x = coreml_update_state_82)[name = string("model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_31_write_state")]; - tensor coreml_update_state_83 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_31")]; - tensor var_11051_begin_0 = const()[name = string("op_11051_begin_0"), val = tensor([13, 0, 0, 0])]; - tensor var_11051_end_0 = const()[name = string("op_11051_end_0"), val = tensor([14, 1, 512, 256])]; - tensor var_11051_end_mask_0 = const()[name = string("op_11051_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_11051_cast_fp16 = slice_by_index(begin = var_11051_begin_0, end = var_11051_end_0, end_mask = var_11051_end_mask_0, x = coreml_update_state_83)[name = string("op_11051_cast_fp16")]; - tensor var_11058_begin_0 = const()[name = string("op_11058_begin_0"), val = tensor([35, 0, 0, 0])]; - tensor var_11058_end_0 = const()[name = string("op_11058_end_0"), val = tensor([36, 1, 512, 256])]; - tensor var_11058_end_mask_0 = const()[name = string("op_11058_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_11058_cast_fp16 = slice_by_index(begin = var_11058_begin_0, end = var_11058_end_0, end_mask = var_11058_end_mask_0, x = coreml_update_state_83)[name = string("op_11058_cast_fp16")]; - tensor var_11095 = const()[name = string("op_11095"), val = tensor([1, 4, 1, 1])]; - tensor x_245_cast_fp16 = tile(reps = var_11095, x = var_11051_cast_fp16)[name = string("x_245_cast_fp16")]; - tensor var_11115 = const()[name = string("op_11115"), val = tensor([1, 4, 1, 1])]; - tensor x_251_cast_fp16 = tile(reps = var_11115, x = var_11058_cast_fp16)[name = string("x_251_cast_fp16")]; - bool var_11142_transpose_x_1 = const()[name = string("op_11142_transpose_x_1"), val = bool(false)]; - bool var_11142_transpose_y_1 = const()[name = string("op_11142_transpose_y_1"), val = bool(true)]; - tensor var_11142 = matmul(transpose_x = var_11142_transpose_x_1, transpose_y = var_11142_transpose_y_1, x = query_states_61_cast_fp16, y = x_245_cast_fp16)[name = string("op_11142")]; - fp16 var_11143_to_fp16 = const()[name = string("op_11143_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_91_cast_fp16 = mul(x = var_11142, y = var_11143_to_fp16)[name = string("attn_weights_91_cast_fp16")]; - tensor attn_weights_93_cast_fp16 = add(x = attn_weights_91_cast_fp16, y = var_2083)[name = string("attn_weights_93_cast_fp16")]; - int32 var_11178 = const()[name = string("op_11178"), val = int32(-1)]; - tensor attn_weights_95_cast_fp16 = softmax(axis = var_11178, x = attn_weights_93_cast_fp16)[name = string("attn_weights_95_cast_fp16")]; - bool attn_output_151_transpose_x_0 = const()[name = string("attn_output_151_transpose_x_0"), val = bool(false)]; - bool attn_output_151_transpose_y_0 = const()[name = string("attn_output_151_transpose_y_0"), val = bool(false)]; - tensor attn_output_151_cast_fp16 = matmul(transpose_x = attn_output_151_transpose_x_0, transpose_y = attn_output_151_transpose_y_0, x = attn_weights_95_cast_fp16, y = x_251_cast_fp16)[name = string("attn_output_151_cast_fp16")]; - tensor var_11189_perm_0 = const()[name = string("op_11189_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_11193 = const()[name = string("op_11193"), val = tensor([1, 1, 1024])]; - tensor var_11189_cast_fp16 = transpose(perm = var_11189_perm_0, x = attn_output_151_cast_fp16)[name = string("transpose_64")]; - tensor attn_output_155_cast_fp16 = reshape(shape = var_11193, x = var_11189_cast_fp16)[name = string("attn_output_155_cast_fp16")]; - tensor var_11198 = const()[name = string("op_11198"), val = tensor([0, 2, 1])]; - string var_11214_pad_type_0 = const()[name = string("op_11214_pad_type_0"), val = string("valid")]; - int32 var_11214_groups_0 = const()[name = string("op_11214_groups_0"), val = int32(1)]; - tensor var_11214_strides_0 = const()[name = string("op_11214_strides_0"), val = tensor([1])]; - tensor var_11214_pad_0 = const()[name = string("op_11214_pad_0"), val = tensor([0, 0])]; - tensor var_11214_dilations_0 = const()[name = string("op_11214_dilations_0"), val = tensor([1])]; - tensor squeeze_15_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337982080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338866880))))[name = string("squeeze_15_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_11199_cast_fp16 = transpose(perm = var_11198, x = attn_output_155_cast_fp16)[name = string("transpose_63")]; - tensor var_11214_cast_fp16 = conv(dilations = var_11214_dilations_0, groups = var_11214_groups_0, pad = var_11214_pad_0, pad_type = var_11214_pad_type_0, strides = var_11214_strides_0, weight = squeeze_15_cast_fp16_to_fp32_to_fp16_palettized, x = var_11199_cast_fp16)[name = string("op_11214_cast_fp16")]; - tensor var_11218 = const()[name = string("op_11218"), val = tensor([0, 2, 1])]; - int32 var_11229 = const()[name = string("op_11229"), val = int32(-1)]; - fp16 const_596_promoted_to_fp16 = const()[name = string("const_596_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_249_cast_fp16 = transpose(perm = var_11218, x = var_11214_cast_fp16)[name = string("transpose_62")]; - tensor var_11231_cast_fp16 = mul(x = hidden_states_249_cast_fp16, y = const_596_promoted_to_fp16)[name = string("op_11231_cast_fp16")]; - bool input_311_interleave_0 = const()[name = string("input_311_interleave_0"), val = bool(false)]; - tensor input_311_cast_fp16 = concat(axis = var_11229, interleave = input_311_interleave_0, values = (hidden_states_249_cast_fp16, var_11231_cast_fp16))[name = string("input_311_cast_fp16")]; - tensor normed_373_axes_0 = const()[name = string("normed_373_axes_0"), val = tensor([-1])]; - fp16 var_11226_to_fp16 = const()[name = string("op_11226_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_373_cast_fp16 = layer_norm(axes = normed_373_axes_0, epsilon = var_11226_to_fp16, x = input_311_cast_fp16)[name = string("normed_373_cast_fp16")]; - tensor normed_375_begin_0 = const()[name = string("normed_375_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_375_end_0 = const()[name = string("normed_375_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_375_end_mask_0 = const()[name = string("normed_375_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_375_cast_fp16 = slice_by_index(begin = normed_375_begin_0, end = normed_375_end_0, end_mask = normed_375_end_mask_0, x = normed_373_cast_fp16)[name = string("normed_375_cast_fp16")]; - tensor var_11245_to_fp16 = const()[name = string("op_11245_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338885376)))]; - tensor attn_output_159_cast_fp16 = mul(x = normed_375_cast_fp16, y = var_11245_to_fp16)[name = string("attn_output_159_cast_fp16")]; - tensor hidden_states_251_cast_fp16 = add(x = hidden_states_241_cast_fp16, y = attn_output_159_cast_fp16)[name = string("hidden_states_251_cast_fp16")]; - int32 var_11258 = const()[name = string("op_11258"), val = int32(-1)]; - fp16 const_600_promoted_to_fp16 = const()[name = string("const_600_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11260_cast_fp16 = mul(x = hidden_states_251_cast_fp16, y = const_600_promoted_to_fp16)[name = string("op_11260_cast_fp16")]; - bool input_313_interleave_0 = const()[name = string("input_313_interleave_0"), val = bool(false)]; - tensor input_313_cast_fp16 = concat(axis = var_11258, interleave = input_313_interleave_0, values = (hidden_states_251_cast_fp16, var_11260_cast_fp16))[name = string("input_313_cast_fp16")]; - tensor normed_377_axes_0 = const()[name = string("normed_377_axes_0"), val = tensor([-1])]; - fp16 var_11255_to_fp16 = const()[name = string("op_11255_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_377_cast_fp16 = layer_norm(axes = normed_377_axes_0, epsilon = var_11255_to_fp16, x = input_313_cast_fp16)[name = string("normed_377_cast_fp16")]; - tensor normed_379_begin_0 = const()[name = string("normed_379_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_379_end_0 = const()[name = string("normed_379_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_379_end_mask_0 = const()[name = string("normed_379_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_379_cast_fp16 = slice_by_index(begin = normed_379_begin_0, end = normed_379_end_0, end_mask = normed_379_end_mask_0, x = normed_377_cast_fp16)[name = string("normed_379_cast_fp16")]; - tensor var_11274_to_fp16 = const()[name = string("op_11274_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338887744)))]; - tensor x_253_cast_fp16 = mul(x = normed_379_cast_fp16, y = var_11274_to_fp16)[name = string("x_253_cast_fp16")]; - tensor var_11286 = const()[name = string("op_11286"), val = tensor([0, 2, 1])]; - tensor input_315_axes_0 = const()[name = string("input_315_axes_0"), val = tensor([2])]; - tensor var_11287_cast_fp16 = transpose(perm = var_11286, x = x_253_cast_fp16)[name = string("transpose_61")]; - tensor input_315_cast_fp16 = expand_dims(axes = input_315_axes_0, x = var_11287_cast_fp16)[name = string("input_315_cast_fp16")]; - string x_255_pad_type_0 = const()[name = string("x_255_pad_type_0"), val = string("valid")]; - tensor x_255_strides_0 = const()[name = string("x_255_strides_0"), val = tensor([1, 1])]; - tensor x_255_pad_0 = const()[name = string("x_255_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_255_dilations_0 = const()[name = string("x_255_dilations_0"), val = tensor([1, 1])]; - int32 x_255_groups_0 = const()[name = string("x_255_groups_0"), val = int32(1)]; - tensor model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338890112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344862144))))[name = string("model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_255_cast_fp16 = conv(dilations = x_255_dilations_0, groups = x_255_groups_0, pad = x_255_pad_0, pad_type = x_255_pad_type_0, strides = x_255_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_315_cast_fp16)[name = string("x_255_cast_fp16")]; - string b_31_pad_type_0 = const()[name = string("b_31_pad_type_0"), val = string("valid")]; - tensor b_31_strides_0 = const()[name = string("b_31_strides_0"), val = tensor([1, 1])]; - tensor b_31_pad_0 = const()[name = string("b_31_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_31_dilations_0 = const()[name = string("b_31_dilations_0"), val = tensor([1, 1])]; - int32 b_31_groups_0 = const()[name = string("b_31_groups_0"), val = int32(1)]; - tensor model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(344972800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350944832))))[name = string("model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_31_cast_fp16 = conv(dilations = b_31_dilations_0, groups = b_31_groups_0, pad = b_31_pad_0, pad_type = b_31_pad_type_0, strides = b_31_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_315_cast_fp16)[name = string("b_31_cast_fp16")]; - string var_11312_mode_0 = const()[name = string("op_11312_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_11312_cast_fp16 = gelu(mode = var_11312_mode_0, x = x_255_cast_fp16)[name = string("op_11312_cast_fp16")]; - tensor input_317_cast_fp16 = mul(x = var_11312_cast_fp16, y = b_31_cast_fp16)[name = string("input_317_cast_fp16")]; - string e_31_pad_type_0 = const()[name = string("e_31_pad_type_0"), val = string("valid")]; - tensor e_31_strides_0 = const()[name = string("e_31_strides_0"), val = tensor([1, 1])]; - tensor e_31_pad_0 = const()[name = string("e_31_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_31_dilations_0 = const()[name = string("e_31_dilations_0"), val = tensor([1, 1])]; - int32 e_31_groups_0 = const()[name = string("e_31_groups_0"), val = int32(1)]; - tensor model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351055488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357027520))))[name = string("model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_31_cast_fp16 = conv(dilations = e_31_dilations_0, groups = e_31_groups_0, pad = e_31_pad_0, pad_type = e_31_pad_type_0, strides = e_31_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_317_cast_fp16)[name = string("e_31_cast_fp16")]; - tensor var_11320_axes_0 = const()[name = string("op_11320_axes_0"), val = tensor([2])]; - tensor var_11320_cast_fp16 = squeeze(axes = var_11320_axes_0, x = e_31_cast_fp16)[name = string("op_11320_cast_fp16")]; - tensor var_11321 = const()[name = string("op_11321"), val = tensor([0, 2, 1])]; - int32 var_11332 = const()[name = string("op_11332"), val = int32(-1)]; - fp16 const_604_promoted_to_fp16 = const()[name = string("const_604_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_253_cast_fp16 = transpose(perm = var_11321, x = var_11320_cast_fp16)[name = string("transpose_60")]; - tensor var_11334_cast_fp16 = mul(x = hidden_states_253_cast_fp16, y = const_604_promoted_to_fp16)[name = string("op_11334_cast_fp16")]; - bool input_319_interleave_0 = const()[name = string("input_319_interleave_0"), val = bool(false)]; - tensor input_319_cast_fp16 = concat(axis = var_11332, interleave = input_319_interleave_0, values = (hidden_states_253_cast_fp16, var_11334_cast_fp16))[name = string("input_319_cast_fp16")]; - tensor normed_381_axes_0 = const()[name = string("normed_381_axes_0"), val = tensor([-1])]; - fp16 var_11329_to_fp16 = const()[name = string("op_11329_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_381_cast_fp16 = layer_norm(axes = normed_381_axes_0, epsilon = var_11329_to_fp16, x = input_319_cast_fp16)[name = string("normed_381_cast_fp16")]; - tensor normed_383_begin_0 = const()[name = string("normed_383_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_383_end_0 = const()[name = string("normed_383_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_383_end_mask_0 = const()[name = string("normed_383_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_383_cast_fp16 = slice_by_index(begin = normed_383_begin_0, end = normed_383_end_0, end_mask = normed_383_end_mask_0, x = normed_381_cast_fp16)[name = string("normed_383_cast_fp16")]; - tensor var_11348_to_fp16 = const()[name = string("op_11348_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357046016)))]; - tensor hidden_states_255_cast_fp16 = mul(x = normed_383_cast_fp16, y = var_11348_to_fp16)[name = string("hidden_states_255_cast_fp16")]; - tensor hidden_states_257_cast_fp16 = add(x = hidden_states_251_cast_fp16, y = hidden_states_255_cast_fp16)[name = string("hidden_states_257_cast_fp16")]; - int32 var_11399 = const()[name = string("op_11399"), val = int32(-1)]; - fp16 const_608_promoted_to_fp16 = const()[name = string("const_608_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11401_cast_fp16 = mul(x = hidden_states_257_cast_fp16, y = const_608_promoted_to_fp16)[name = string("op_11401_cast_fp16")]; - bool input_321_interleave_0 = const()[name = string("input_321_interleave_0"), val = bool(false)]; - tensor input_321_cast_fp16 = concat(axis = var_11399, interleave = input_321_interleave_0, values = (hidden_states_257_cast_fp16, var_11401_cast_fp16))[name = string("input_321_cast_fp16")]; - tensor normed_385_axes_0 = const()[name = string("normed_385_axes_0"), val = tensor([-1])]; - fp16 var_11396_to_fp16 = const()[name = string("op_11396_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_385_cast_fp16 = layer_norm(axes = normed_385_axes_0, epsilon = var_11396_to_fp16, x = input_321_cast_fp16)[name = string("normed_385_cast_fp16")]; - tensor normed_387_begin_0 = const()[name = string("normed_387_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_387_end_0 = const()[name = string("normed_387_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_387_end_mask_0 = const()[name = string("normed_387_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_387_cast_fp16 = slice_by_index(begin = normed_387_begin_0, end = normed_387_end_0, end_mask = normed_387_end_mask_0, x = normed_385_cast_fp16)[name = string("normed_387_cast_fp16")]; - tensor var_11415_to_fp16 = const()[name = string("op_11415_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357048384)))]; - tensor hidden_states_259_cast_fp16 = mul(x = normed_387_cast_fp16, y = var_11415_to_fp16)[name = string("hidden_states_259_cast_fp16")]; - tensor var_11420 = const()[name = string("op_11420"), val = tensor([0, 2, 1])]; - tensor var_11423_axes_0 = const()[name = string("op_11423_axes_0"), val = tensor([2])]; - tensor var_11421_cast_fp16 = transpose(perm = var_11420, x = hidden_states_259_cast_fp16)[name = string("transpose_59")]; - tensor var_11423_cast_fp16 = expand_dims(axes = var_11423_axes_0, x = var_11421_cast_fp16)[name = string("op_11423_cast_fp16")]; - string var_11439_pad_type_0 = const()[name = string("op_11439_pad_type_0"), val = string("valid")]; - tensor var_11439_strides_0 = const()[name = string("op_11439_strides_0"), val = tensor([1, 1])]; - tensor var_11439_pad_0 = const()[name = string("op_11439_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_11439_dilations_0 = const()[name = string("op_11439_dilations_0"), val = tensor([1, 1])]; - int32 var_11439_groups_0 = const()[name = string("op_11439_groups_0"), val = int32(1)]; - tensor var_11439 = conv(dilations = var_11439_dilations_0, groups = var_11439_groups_0, pad = var_11439_pad_0, pad_type = var_11439_pad_type_0, strides = var_11439_strides_0, weight = model_model_layers_16_self_attn_q_proj_weight_palettized, x = var_11423_cast_fp16)[name = string("op_11439")]; - tensor var_11444 = const()[name = string("op_11444"), val = tensor([1, 4, 1, 256])]; - tensor var_11445 = reshape(shape = var_11444, x = var_11439)[name = string("op_11445")]; - string var_11461_pad_type_0 = const()[name = string("op_11461_pad_type_0"), val = string("valid")]; - tensor var_11461_strides_0 = const()[name = string("op_11461_strides_0"), val = tensor([1, 1])]; - tensor var_11461_pad_0 = const()[name = string("op_11461_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_11461_dilations_0 = const()[name = string("op_11461_dilations_0"), val = tensor([1, 1])]; - int32 var_11461_groups_0 = const()[name = string("op_11461_groups_0"), val = int32(1)]; - tensor var_11461 = conv(dilations = var_11461_dilations_0, groups = var_11461_groups_0, pad = var_11461_pad_0, pad_type = var_11461_pad_type_0, strides = var_11461_strides_0, weight = model_model_layers_16_self_attn_k_proj_weight_palettized, x = var_11423_cast_fp16)[name = string("op_11461")]; - tensor var_11466 = const()[name = string("op_11466"), val = tensor([1, 1, 1, 256])]; - tensor var_11467 = reshape(shape = var_11466, x = var_11461)[name = string("op_11467")]; - string var_11483_pad_type_0 = const()[name = string("op_11483_pad_type_0"), val = string("valid")]; - tensor var_11483_strides_0 = const()[name = string("op_11483_strides_0"), val = tensor([1, 1])]; - tensor var_11483_pad_0 = const()[name = string("op_11483_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_11483_dilations_0 = const()[name = string("op_11483_dilations_0"), val = tensor([1, 1])]; - int32 var_11483_groups_0 = const()[name = string("op_11483_groups_0"), val = int32(1)]; - tensor var_11483 = conv(dilations = var_11483_dilations_0, groups = var_11483_groups_0, pad = var_11483_pad_0, pad_type = var_11483_pad_type_0, strides = var_11483_strides_0, weight = model_model_layers_16_self_attn_v_proj_weight_palettized, x = var_11423_cast_fp16)[name = string("op_11483")]; - tensor var_11488 = const()[name = string("op_11488"), val = tensor([1, 1, 1, 256])]; - tensor var_11489 = reshape(shape = var_11488, x = var_11483)[name = string("op_11489")]; - int32 var_11504 = const()[name = string("op_11504"), val = int32(-1)]; - fp16 const_612_promoted = const()[name = string("const_612_promoted"), val = fp16(-0x1p+0)]; - tensor var_11506 = mul(x = var_11445, y = const_612_promoted)[name = string("op_11506")]; - bool input_325_interleave_0 = const()[name = string("input_325_interleave_0"), val = bool(false)]; - tensor input_325 = concat(axis = var_11504, interleave = input_325_interleave_0, values = (var_11445, var_11506))[name = string("input_325")]; - tensor normed_389_axes_0 = const()[name = string("normed_389_axes_0"), val = tensor([-1])]; - fp16 var_11501_to_fp16 = const()[name = string("op_11501_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_389_cast_fp16 = layer_norm(axes = normed_389_axes_0, epsilon = var_11501_to_fp16, x = input_325)[name = string("normed_389_cast_fp16")]; - tensor normed_391_begin_0 = const()[name = string("normed_391_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_391_end_0 = const()[name = string("normed_391_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_391_end_mask_0 = const()[name = string("normed_391_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_391 = slice_by_index(begin = normed_391_begin_0, end = normed_391_end_0, end_mask = normed_391_end_mask_0, x = normed_389_cast_fp16)[name = string("normed_391")]; - tensor var_11520_to_fp16 = const()[name = string("op_11520_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357050752)))]; - tensor q_33_cast_fp16 = mul(x = normed_391, y = var_11520_to_fp16)[name = string("q_33_cast_fp16")]; - int32 var_11531 = const()[name = string("op_11531"), val = int32(-1)]; - fp16 const_616_promoted = const()[name = string("const_616_promoted"), val = fp16(-0x1p+0)]; - tensor var_11533 = mul(x = var_11467, y = const_616_promoted)[name = string("op_11533")]; - bool input_327_interleave_0 = const()[name = string("input_327_interleave_0"), val = bool(false)]; - tensor input_327 = concat(axis = var_11531, interleave = input_327_interleave_0, values = (var_11467, var_11533))[name = string("input_327")]; - tensor normed_393_axes_0 = const()[name = string("normed_393_axes_0"), val = tensor([-1])]; - fp16 var_11528_to_fp16 = const()[name = string("op_11528_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_393_cast_fp16 = layer_norm(axes = normed_393_axes_0, epsilon = var_11528_to_fp16, x = input_327)[name = string("normed_393_cast_fp16")]; - tensor normed_395_begin_0 = const()[name = string("normed_395_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_395_end_0 = const()[name = string("normed_395_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_395_end_mask_0 = const()[name = string("normed_395_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_395 = slice_by_index(begin = normed_395_begin_0, end = normed_395_end_0, end_mask = normed_395_end_mask_0, x = normed_393_cast_fp16)[name = string("normed_395")]; - tensor var_11547_to_fp16 = const()[name = string("op_11547_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357051328)))]; - tensor k_33_cast_fp16 = mul(x = normed_395, y = var_11547_to_fp16)[name = string("k_33_cast_fp16")]; - tensor var_11549_cast_fp16 = mul(x = q_33_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11549_cast_fp16")]; - tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_65_cast_fp16 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_33_cast_fp16)[name = string("x1_65_cast_fp16")]; - tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_65_cast_fp16 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_33_cast_fp16)[name = string("x2_65_cast_fp16")]; - fp16 const_622_promoted_to_fp16 = const()[name = string("const_622_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11570_cast_fp16 = mul(x = x2_65_cast_fp16, y = const_622_promoted_to_fp16)[name = string("op_11570_cast_fp16")]; - int32 var_11572 = const()[name = string("op_11572"), val = int32(-1)]; - bool var_11573_interleave_0 = const()[name = string("op_11573_interleave_0"), val = bool(false)]; - tensor var_11573_cast_fp16 = concat(axis = var_11572, interleave = var_11573_interleave_0, values = (var_11570_cast_fp16, x1_65_cast_fp16))[name = string("op_11573_cast_fp16")]; - tensor var_11574_cast_fp16 = mul(x = var_11573_cast_fp16, y = sin_1_cast_fp16)[name = string("op_11574_cast_fp16")]; - tensor query_states_65_cast_fp16 = add(x = var_11549_cast_fp16, y = var_11574_cast_fp16)[name = string("query_states_65_cast_fp16")]; - tensor var_11577_cast_fp16 = mul(x = k_33_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11577_cast_fp16")]; - tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_67_cast_fp16 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_33_cast_fp16)[name = string("x1_67_cast_fp16")]; - tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_67_cast_fp16 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_33_cast_fp16)[name = string("x2_67_cast_fp16")]; - fp16 const_625_promoted_to_fp16 = const()[name = string("const_625_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11598_cast_fp16 = mul(x = x2_67_cast_fp16, y = const_625_promoted_to_fp16)[name = string("op_11598_cast_fp16")]; - int32 var_11600 = const()[name = string("op_11600"), val = int32(-1)]; - bool var_11601_interleave_0 = const()[name = string("op_11601_interleave_0"), val = bool(false)]; - tensor var_11601_cast_fp16 = concat(axis = var_11600, interleave = var_11601_interleave_0, values = (var_11598_cast_fp16, x1_67_cast_fp16))[name = string("op_11601_cast_fp16")]; - tensor var_11602_cast_fp16 = mul(x = var_11601_cast_fp16, y = sin_1_cast_fp16)[name = string("op_11602_cast_fp16")]; - tensor key_states_65_cast_fp16 = add(x = var_11577_cast_fp16, y = var_11602_cast_fp16)[name = string("key_states_65_cast_fp16")]; - tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([14])]; - tensor expand_dims_193 = const()[name = string("expand_dims_193"), val = tensor([0])]; - tensor expand_dims_195 = const()[name = string("expand_dims_195"), val = tensor([0])]; - tensor expand_dims_196 = const()[name = string("expand_dims_196"), val = tensor([15])]; - int32 concat_130_axis_0 = const()[name = string("concat_130_axis_0"), val = int32(0)]; - bool concat_130_interleave_0 = const()[name = string("concat_130_interleave_0"), val = bool(false)]; - tensor concat_130 = concat(axis = concat_130_axis_0, interleave = concat_130_interleave_0, values = (expand_dims_192, expand_dims_193, current_pos, expand_dims_195))[name = string("concat_130")]; - tensor concat_131_values1_0 = const()[name = string("concat_131_values1_0"), val = tensor([0])]; - tensor concat_131_values3_0 = const()[name = string("concat_131_values3_0"), val = tensor([0])]; - int32 concat_131_axis_0 = const()[name = string("concat_131_axis_0"), val = int32(0)]; - bool concat_131_interleave_0 = const()[name = string("concat_131_interleave_0"), val = bool(false)]; - tensor concat_131 = concat(axis = concat_131_axis_0, interleave = concat_131_interleave_0, values = (expand_dims_196, concat_131_values1_0, var_1909, concat_131_values3_0))[name = string("concat_131")]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_130, begin_mask = model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0, end = concat_131, end_mask = model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_29_stride_0, update = key_states_65_cast_fp16, x = coreml_update_state_83)[name = string("model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_32_write_state")]; - tensor coreml_update_state_84 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_32")]; - tensor expand_dims_198 = const()[name = string("expand_dims_198"), val = tensor([36])]; - tensor expand_dims_199 = const()[name = string("expand_dims_199"), val = tensor([0])]; - tensor expand_dims_201 = const()[name = string("expand_dims_201"), val = tensor([0])]; - tensor expand_dims_202 = const()[name = string("expand_dims_202"), val = tensor([37])]; - int32 concat_134_axis_0 = const()[name = string("concat_134_axis_0"), val = int32(0)]; - bool concat_134_interleave_0 = const()[name = string("concat_134_interleave_0"), val = bool(false)]; - tensor concat_134 = concat(axis = concat_134_axis_0, interleave = concat_134_interleave_0, values = (expand_dims_198, expand_dims_199, current_pos, expand_dims_201))[name = string("concat_134")]; - tensor concat_135_values1_0 = const()[name = string("concat_135_values1_0"), val = tensor([0])]; - tensor concat_135_values3_0 = const()[name = string("concat_135_values3_0"), val = tensor([0])]; - int32 concat_135_axis_0 = const()[name = string("concat_135_axis_0"), val = int32(0)]; - bool concat_135_interleave_0 = const()[name = string("concat_135_interleave_0"), val = bool(false)]; - tensor concat_135 = concat(axis = concat_135_axis_0, interleave = concat_135_interleave_0, values = (expand_dims_202, concat_135_values1_0, var_1909, concat_135_values3_0))[name = string("concat_135")]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_134, begin_mask = model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0, end = concat_135, end_mask = model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_30_stride_0, update = var_11489, x = coreml_update_state_84)[name = string("model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_33_write_state")]; - tensor coreml_update_state_85 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_33")]; - tensor var_11657_begin_0 = const()[name = string("op_11657_begin_0"), val = tensor([14, 0, 0, 0])]; - tensor var_11657_end_0 = const()[name = string("op_11657_end_0"), val = tensor([15, 1, 512, 256])]; - tensor var_11657_end_mask_0 = const()[name = string("op_11657_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_11657_cast_fp16 = slice_by_index(begin = var_11657_begin_0, end = var_11657_end_0, end_mask = var_11657_end_mask_0, x = coreml_update_state_85)[name = string("op_11657_cast_fp16")]; - tensor var_11664_begin_0 = const()[name = string("op_11664_begin_0"), val = tensor([36, 0, 0, 0])]; - tensor var_11664_end_0 = const()[name = string("op_11664_end_0"), val = tensor([37, 1, 512, 256])]; - tensor var_11664_end_mask_0 = const()[name = string("op_11664_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_11664_cast_fp16 = slice_by_index(begin = var_11664_begin_0, end = var_11664_end_0, end_mask = var_11664_end_mask_0, x = coreml_update_state_85)[name = string("op_11664_cast_fp16")]; - tensor var_11701 = const()[name = string("op_11701"), val = tensor([1, 4, 1, 1])]; - tensor x_261_cast_fp16 = tile(reps = var_11701, x = var_11657_cast_fp16)[name = string("x_261_cast_fp16")]; - tensor var_11721 = const()[name = string("op_11721"), val = tensor([1, 4, 1, 1])]; - tensor x_267_cast_fp16 = tile(reps = var_11721, x = var_11664_cast_fp16)[name = string("x_267_cast_fp16")]; - bool var_11748_transpose_x_1 = const()[name = string("op_11748_transpose_x_1"), val = bool(false)]; - bool var_11748_transpose_y_1 = const()[name = string("op_11748_transpose_y_1"), val = bool(true)]; - tensor var_11748 = matmul(transpose_x = var_11748_transpose_x_1, transpose_y = var_11748_transpose_y_1, x = query_states_65_cast_fp16, y = x_261_cast_fp16)[name = string("op_11748")]; - fp16 var_11749_to_fp16 = const()[name = string("op_11749_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_97_cast_fp16 = mul(x = var_11748, y = var_11749_to_fp16)[name = string("attn_weights_97_cast_fp16")]; - tensor attn_weights_99_cast_fp16 = add(x = attn_weights_97_cast_fp16, y = var_2083)[name = string("attn_weights_99_cast_fp16")]; - int32 var_11784 = const()[name = string("op_11784"), val = int32(-1)]; - tensor attn_weights_101_cast_fp16 = softmax(axis = var_11784, x = attn_weights_99_cast_fp16)[name = string("attn_weights_101_cast_fp16")]; - bool attn_output_161_transpose_x_0 = const()[name = string("attn_output_161_transpose_x_0"), val = bool(false)]; - bool attn_output_161_transpose_y_0 = const()[name = string("attn_output_161_transpose_y_0"), val = bool(false)]; - tensor attn_output_161_cast_fp16 = matmul(transpose_x = attn_output_161_transpose_x_0, transpose_y = attn_output_161_transpose_y_0, x = attn_weights_101_cast_fp16, y = x_267_cast_fp16)[name = string("attn_output_161_cast_fp16")]; - tensor var_11795_perm_0 = const()[name = string("op_11795_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_11799 = const()[name = string("op_11799"), val = tensor([1, 1, 1024])]; - tensor var_11795_cast_fp16 = transpose(perm = var_11795_perm_0, x = attn_output_161_cast_fp16)[name = string("transpose_58")]; - tensor attn_output_165_cast_fp16 = reshape(shape = var_11799, x = var_11795_cast_fp16)[name = string("attn_output_165_cast_fp16")]; - tensor var_11804 = const()[name = string("op_11804"), val = tensor([0, 2, 1])]; - string var_11820_pad_type_0 = const()[name = string("op_11820_pad_type_0"), val = string("valid")]; - int32 var_11820_groups_0 = const()[name = string("op_11820_groups_0"), val = int32(1)]; - tensor var_11820_strides_0 = const()[name = string("op_11820_strides_0"), val = tensor([1])]; - tensor var_11820_pad_0 = const()[name = string("op_11820_pad_0"), val = tensor([0, 0])]; - tensor var_11820_dilations_0 = const()[name = string("op_11820_dilations_0"), val = tensor([1])]; - tensor squeeze_16_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357051904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357936704))))[name = string("squeeze_16_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_11805_cast_fp16 = transpose(perm = var_11804, x = attn_output_165_cast_fp16)[name = string("transpose_57")]; - tensor var_11820_cast_fp16 = conv(dilations = var_11820_dilations_0, groups = var_11820_groups_0, pad = var_11820_pad_0, pad_type = var_11820_pad_type_0, strides = var_11820_strides_0, weight = squeeze_16_cast_fp16_to_fp32_to_fp16_palettized, x = var_11805_cast_fp16)[name = string("op_11820_cast_fp16")]; - tensor var_11824 = const()[name = string("op_11824"), val = tensor([0, 2, 1])]; - int32 var_11835 = const()[name = string("op_11835"), val = int32(-1)]; - fp16 const_634_promoted_to_fp16 = const()[name = string("const_634_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_265_cast_fp16 = transpose(perm = var_11824, x = var_11820_cast_fp16)[name = string("transpose_56")]; - tensor var_11837_cast_fp16 = mul(x = hidden_states_265_cast_fp16, y = const_634_promoted_to_fp16)[name = string("op_11837_cast_fp16")]; - bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; - tensor input_331_cast_fp16 = concat(axis = var_11835, interleave = input_331_interleave_0, values = (hidden_states_265_cast_fp16, var_11837_cast_fp16))[name = string("input_331_cast_fp16")]; - tensor normed_397_axes_0 = const()[name = string("normed_397_axes_0"), val = tensor([-1])]; - fp16 var_11832_to_fp16 = const()[name = string("op_11832_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_397_cast_fp16 = layer_norm(axes = normed_397_axes_0, epsilon = var_11832_to_fp16, x = input_331_cast_fp16)[name = string("normed_397_cast_fp16")]; - tensor normed_399_begin_0 = const()[name = string("normed_399_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_399_end_0 = const()[name = string("normed_399_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_399_end_mask_0 = const()[name = string("normed_399_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_399_cast_fp16 = slice_by_index(begin = normed_399_begin_0, end = normed_399_end_0, end_mask = normed_399_end_mask_0, x = normed_397_cast_fp16)[name = string("normed_399_cast_fp16")]; - tensor var_11851_to_fp16 = const()[name = string("op_11851_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357955200)))]; - tensor attn_output_169_cast_fp16 = mul(x = normed_399_cast_fp16, y = var_11851_to_fp16)[name = string("attn_output_169_cast_fp16")]; - tensor hidden_states_267_cast_fp16 = add(x = hidden_states_257_cast_fp16, y = attn_output_169_cast_fp16)[name = string("hidden_states_267_cast_fp16")]; - int32 var_11864 = const()[name = string("op_11864"), val = int32(-1)]; - fp16 const_638_promoted_to_fp16 = const()[name = string("const_638_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11866_cast_fp16 = mul(x = hidden_states_267_cast_fp16, y = const_638_promoted_to_fp16)[name = string("op_11866_cast_fp16")]; - bool input_333_interleave_0 = const()[name = string("input_333_interleave_0"), val = bool(false)]; - tensor input_333_cast_fp16 = concat(axis = var_11864, interleave = input_333_interleave_0, values = (hidden_states_267_cast_fp16, var_11866_cast_fp16))[name = string("input_333_cast_fp16")]; - tensor normed_401_axes_0 = const()[name = string("normed_401_axes_0"), val = tensor([-1])]; - fp16 var_11861_to_fp16 = const()[name = string("op_11861_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_401_cast_fp16 = layer_norm(axes = normed_401_axes_0, epsilon = var_11861_to_fp16, x = input_333_cast_fp16)[name = string("normed_401_cast_fp16")]; - tensor normed_403_begin_0 = const()[name = string("normed_403_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_403_end_0 = const()[name = string("normed_403_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_403_end_mask_0 = const()[name = string("normed_403_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_403_cast_fp16 = slice_by_index(begin = normed_403_begin_0, end = normed_403_end_0, end_mask = normed_403_end_mask_0, x = normed_401_cast_fp16)[name = string("normed_403_cast_fp16")]; - tensor var_11880_to_fp16 = const()[name = string("op_11880_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357957568)))]; - tensor x_269_cast_fp16 = mul(x = normed_403_cast_fp16, y = var_11880_to_fp16)[name = string("x_269_cast_fp16")]; - tensor var_11892 = const()[name = string("op_11892"), val = tensor([0, 2, 1])]; - tensor input_335_axes_0 = const()[name = string("input_335_axes_0"), val = tensor([2])]; - tensor var_11893_cast_fp16 = transpose(perm = var_11892, x = x_269_cast_fp16)[name = string("transpose_55")]; - tensor input_335_cast_fp16 = expand_dims(axes = input_335_axes_0, x = var_11893_cast_fp16)[name = string("input_335_cast_fp16")]; - string x_271_pad_type_0 = const()[name = string("x_271_pad_type_0"), val = string("valid")]; - tensor x_271_strides_0 = const()[name = string("x_271_strides_0"), val = tensor([1, 1])]; - tensor x_271_pad_0 = const()[name = string("x_271_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_271_dilations_0 = const()[name = string("x_271_dilations_0"), val = tensor([1, 1])]; - int32 x_271_groups_0 = const()[name = string("x_271_groups_0"), val = int32(1)]; - tensor model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357959936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363931968))))[name = string("model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_271_cast_fp16 = conv(dilations = x_271_dilations_0, groups = x_271_groups_0, pad = x_271_pad_0, pad_type = x_271_pad_type_0, strides = x_271_strides_0, weight = model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_335_cast_fp16)[name = string("x_271_cast_fp16")]; - string b_33_pad_type_0 = const()[name = string("b_33_pad_type_0"), val = string("valid")]; - tensor b_33_strides_0 = const()[name = string("b_33_strides_0"), val = tensor([1, 1])]; - tensor b_33_pad_0 = const()[name = string("b_33_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_33_dilations_0 = const()[name = string("b_33_dilations_0"), val = tensor([1, 1])]; - int32 b_33_groups_0 = const()[name = string("b_33_groups_0"), val = int32(1)]; - tensor model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364042624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370014656))))[name = string("model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_33_cast_fp16 = conv(dilations = b_33_dilations_0, groups = b_33_groups_0, pad = b_33_pad_0, pad_type = b_33_pad_type_0, strides = b_33_strides_0, weight = model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_335_cast_fp16)[name = string("b_33_cast_fp16")]; - string var_11918_mode_0 = const()[name = string("op_11918_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_11918_cast_fp16 = gelu(mode = var_11918_mode_0, x = x_271_cast_fp16)[name = string("op_11918_cast_fp16")]; - tensor input_337_cast_fp16 = mul(x = var_11918_cast_fp16, y = b_33_cast_fp16)[name = string("input_337_cast_fp16")]; - string e_33_pad_type_0 = const()[name = string("e_33_pad_type_0"), val = string("valid")]; - tensor e_33_strides_0 = const()[name = string("e_33_strides_0"), val = tensor([1, 1])]; - tensor e_33_pad_0 = const()[name = string("e_33_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_33_dilations_0 = const()[name = string("e_33_dilations_0"), val = tensor([1, 1])]; - int32 e_33_groups_0 = const()[name = string("e_33_groups_0"), val = int32(1)]; - tensor model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370125312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376097344))))[name = string("model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_33_cast_fp16 = conv(dilations = e_33_dilations_0, groups = e_33_groups_0, pad = e_33_pad_0, pad_type = e_33_pad_type_0, strides = e_33_strides_0, weight = model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_337_cast_fp16)[name = string("e_33_cast_fp16")]; - tensor var_11926_axes_0 = const()[name = string("op_11926_axes_0"), val = tensor([2])]; - tensor var_11926_cast_fp16 = squeeze(axes = var_11926_axes_0, x = e_33_cast_fp16)[name = string("op_11926_cast_fp16")]; - tensor var_11927 = const()[name = string("op_11927"), val = tensor([0, 2, 1])]; - int32 var_11938 = const()[name = string("op_11938"), val = int32(-1)]; - fp16 const_642_promoted_to_fp16 = const()[name = string("const_642_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_269_cast_fp16 = transpose(perm = var_11927, x = var_11926_cast_fp16)[name = string("transpose_54")]; - tensor var_11940_cast_fp16 = mul(x = hidden_states_269_cast_fp16, y = const_642_promoted_to_fp16)[name = string("op_11940_cast_fp16")]; - bool input_339_interleave_0 = const()[name = string("input_339_interleave_0"), val = bool(false)]; - tensor input_339_cast_fp16 = concat(axis = var_11938, interleave = input_339_interleave_0, values = (hidden_states_269_cast_fp16, var_11940_cast_fp16))[name = string("input_339_cast_fp16")]; - tensor normed_405_axes_0 = const()[name = string("normed_405_axes_0"), val = tensor([-1])]; - fp16 var_11935_to_fp16 = const()[name = string("op_11935_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_405_cast_fp16 = layer_norm(axes = normed_405_axes_0, epsilon = var_11935_to_fp16, x = input_339_cast_fp16)[name = string("normed_405_cast_fp16")]; - tensor normed_407_begin_0 = const()[name = string("normed_407_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_407_end_0 = const()[name = string("normed_407_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_407_end_mask_0 = const()[name = string("normed_407_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_407_cast_fp16 = slice_by_index(begin = normed_407_begin_0, end = normed_407_end_0, end_mask = normed_407_end_mask_0, x = normed_405_cast_fp16)[name = string("normed_407_cast_fp16")]; - tensor var_11954_to_fp16 = const()[name = string("op_11954_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376115840)))]; - tensor hidden_states_271_cast_fp16 = mul(x = normed_407_cast_fp16, y = var_11954_to_fp16)[name = string("hidden_states_271_cast_fp16")]; - tensor hidden_states_273_cast_fp16 = add(x = hidden_states_267_cast_fp16, y = hidden_states_271_cast_fp16)[name = string("hidden_states_273_cast_fp16")]; - int32 var_12005 = const()[name = string("op_12005"), val = int32(-1)]; - fp16 const_646_promoted_to_fp16 = const()[name = string("const_646_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12007_cast_fp16 = mul(x = hidden_states_273_cast_fp16, y = const_646_promoted_to_fp16)[name = string("op_12007_cast_fp16")]; - bool input_341_interleave_0 = const()[name = string("input_341_interleave_0"), val = bool(false)]; - tensor input_341_cast_fp16 = concat(axis = var_12005, interleave = input_341_interleave_0, values = (hidden_states_273_cast_fp16, var_12007_cast_fp16))[name = string("input_341_cast_fp16")]; - tensor normed_409_axes_0 = const()[name = string("normed_409_axes_0"), val = tensor([-1])]; - fp16 var_12002_to_fp16 = const()[name = string("op_12002_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_409_cast_fp16 = layer_norm(axes = normed_409_axes_0, epsilon = var_12002_to_fp16, x = input_341_cast_fp16)[name = string("normed_409_cast_fp16")]; - tensor normed_411_begin_0 = const()[name = string("normed_411_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_411_end_0 = const()[name = string("normed_411_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_411_end_mask_0 = const()[name = string("normed_411_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_411_cast_fp16 = slice_by_index(begin = normed_411_begin_0, end = normed_411_end_0, end_mask = normed_411_end_mask_0, x = normed_409_cast_fp16)[name = string("normed_411_cast_fp16")]; - tensor var_12021_to_fp16 = const()[name = string("op_12021_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376118208)))]; - tensor hidden_states_275_cast_fp16 = mul(x = normed_411_cast_fp16, y = var_12021_to_fp16)[name = string("hidden_states_275_cast_fp16")]; - tensor var_12026 = const()[name = string("op_12026"), val = tensor([0, 2, 1])]; - tensor var_12029_axes_0 = const()[name = string("op_12029_axes_0"), val = tensor([2])]; - tensor var_12027_cast_fp16 = transpose(perm = var_12026, x = hidden_states_275_cast_fp16)[name = string("transpose_53")]; - tensor var_12029_cast_fp16 = expand_dims(axes = var_12029_axes_0, x = var_12027_cast_fp16)[name = string("op_12029_cast_fp16")]; - string var_12045_pad_type_0 = const()[name = string("op_12045_pad_type_0"), val = string("valid")]; - tensor var_12045_strides_0 = const()[name = string("op_12045_strides_0"), val = tensor([1, 1])]; - tensor var_12045_pad_0 = const()[name = string("op_12045_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_12045_dilations_0 = const()[name = string("op_12045_dilations_0"), val = tensor([1, 1])]; - int32 var_12045_groups_0 = const()[name = string("op_12045_groups_0"), val = int32(1)]; - tensor var_12045 = conv(dilations = var_12045_dilations_0, groups = var_12045_groups_0, pad = var_12045_pad_0, pad_type = var_12045_pad_type_0, strides = var_12045_strides_0, weight = model_model_layers_17_self_attn_q_proj_weight_palettized, x = var_12029_cast_fp16)[name = string("op_12045")]; - tensor var_12050 = const()[name = string("op_12050"), val = tensor([1, 4, 1, 256])]; - tensor var_12051 = reshape(shape = var_12050, x = var_12045)[name = string("op_12051")]; - string var_12067_pad_type_0 = const()[name = string("op_12067_pad_type_0"), val = string("valid")]; - tensor var_12067_strides_0 = const()[name = string("op_12067_strides_0"), val = tensor([1, 1])]; - tensor var_12067_pad_0 = const()[name = string("op_12067_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_12067_dilations_0 = const()[name = string("op_12067_dilations_0"), val = tensor([1, 1])]; - int32 var_12067_groups_0 = const()[name = string("op_12067_groups_0"), val = int32(1)]; - tensor var_12067 = conv(dilations = var_12067_dilations_0, groups = var_12067_groups_0, pad = var_12067_pad_0, pad_type = var_12067_pad_type_0, strides = var_12067_strides_0, weight = model_model_layers_17_self_attn_k_proj_weight_palettized, x = var_12029_cast_fp16)[name = string("op_12067")]; - tensor var_12072 = const()[name = string("op_12072"), val = tensor([1, 1, 1, 256])]; - tensor var_12073 = reshape(shape = var_12072, x = var_12067)[name = string("op_12073")]; - string var_12089_pad_type_0 = const()[name = string("op_12089_pad_type_0"), val = string("valid")]; - tensor var_12089_strides_0 = const()[name = string("op_12089_strides_0"), val = tensor([1, 1])]; - tensor var_12089_pad_0 = const()[name = string("op_12089_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_12089_dilations_0 = const()[name = string("op_12089_dilations_0"), val = tensor([1, 1])]; - int32 var_12089_groups_0 = const()[name = string("op_12089_groups_0"), val = int32(1)]; - tensor var_12089 = conv(dilations = var_12089_dilations_0, groups = var_12089_groups_0, pad = var_12089_pad_0, pad_type = var_12089_pad_type_0, strides = var_12089_strides_0, weight = model_model_layers_17_self_attn_v_proj_weight_palettized, x = var_12029_cast_fp16)[name = string("op_12089")]; - tensor var_12094 = const()[name = string("op_12094"), val = tensor([1, 1, 1, 256])]; - tensor var_12095 = reshape(shape = var_12094, x = var_12089)[name = string("op_12095")]; - int32 var_12110 = const()[name = string("op_12110"), val = int32(-1)]; - fp16 const_650_promoted = const()[name = string("const_650_promoted"), val = fp16(-0x1p+0)]; - tensor var_12112 = mul(x = var_12051, y = const_650_promoted)[name = string("op_12112")]; - bool input_345_interleave_0 = const()[name = string("input_345_interleave_0"), val = bool(false)]; - tensor input_345 = concat(axis = var_12110, interleave = input_345_interleave_0, values = (var_12051, var_12112))[name = string("input_345")]; - tensor normed_413_axes_0 = const()[name = string("normed_413_axes_0"), val = tensor([-1])]; - fp16 var_12107_to_fp16 = const()[name = string("op_12107_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_413_cast_fp16 = layer_norm(axes = normed_413_axes_0, epsilon = var_12107_to_fp16, x = input_345)[name = string("normed_413_cast_fp16")]; - tensor normed_415_begin_0 = const()[name = string("normed_415_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_415_end_0 = const()[name = string("normed_415_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_415_end_mask_0 = const()[name = string("normed_415_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_415 = slice_by_index(begin = normed_415_begin_0, end = normed_415_end_0, end_mask = normed_415_end_mask_0, x = normed_413_cast_fp16)[name = string("normed_415")]; - tensor var_12126_to_fp16 = const()[name = string("op_12126_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376120576)))]; - tensor q_35_cast_fp16 = mul(x = normed_415, y = var_12126_to_fp16)[name = string("q_35_cast_fp16")]; - int32 var_12137 = const()[name = string("op_12137"), val = int32(-1)]; - fp16 const_654_promoted = const()[name = string("const_654_promoted"), val = fp16(-0x1p+0)]; - tensor var_12139 = mul(x = var_12073, y = const_654_promoted)[name = string("op_12139")]; - bool input_347_interleave_0 = const()[name = string("input_347_interleave_0"), val = bool(false)]; - tensor input_347 = concat(axis = var_12137, interleave = input_347_interleave_0, values = (var_12073, var_12139))[name = string("input_347")]; - tensor normed_417_axes_0 = const()[name = string("normed_417_axes_0"), val = tensor([-1])]; - fp16 var_12134_to_fp16 = const()[name = string("op_12134_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_417_cast_fp16 = layer_norm(axes = normed_417_axes_0, epsilon = var_12134_to_fp16, x = input_347)[name = string("normed_417_cast_fp16")]; - tensor normed_419_begin_0 = const()[name = string("normed_419_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_419_end_0 = const()[name = string("normed_419_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_419_end_mask_0 = const()[name = string("normed_419_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_419 = slice_by_index(begin = normed_419_begin_0, end = normed_419_end_0, end_mask = normed_419_end_mask_0, x = normed_417_cast_fp16)[name = string("normed_419")]; - tensor var_12153_to_fp16 = const()[name = string("op_12153_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376121152)))]; - tensor k_35_cast_fp16 = mul(x = normed_419, y = var_12153_to_fp16)[name = string("k_35_cast_fp16")]; - tensor var_12155_cast_fp16 = mul(x = q_35_cast_fp16, y = cos_21_cast_fp16)[name = string("op_12155_cast_fp16")]; - tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_69_cast_fp16 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_35_cast_fp16)[name = string("x1_69_cast_fp16")]; - tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_69_cast_fp16 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_35_cast_fp16)[name = string("x2_69_cast_fp16")]; - fp16 const_660_promoted_to_fp16 = const()[name = string("const_660_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12176_cast_fp16 = mul(x = x2_69_cast_fp16, y = const_660_promoted_to_fp16)[name = string("op_12176_cast_fp16")]; - int32 var_12178 = const()[name = string("op_12178"), val = int32(-1)]; - bool var_12179_interleave_0 = const()[name = string("op_12179_interleave_0"), val = bool(false)]; - tensor var_12179_cast_fp16 = concat(axis = var_12178, interleave = var_12179_interleave_0, values = (var_12176_cast_fp16, x1_69_cast_fp16))[name = string("op_12179_cast_fp16")]; - tensor var_12180_cast_fp16 = mul(x = var_12179_cast_fp16, y = sin_21_cast_fp16)[name = string("op_12180_cast_fp16")]; - tensor query_states_69_cast_fp16 = add(x = var_12155_cast_fp16, y = var_12180_cast_fp16)[name = string("query_states_69_cast_fp16")]; - tensor var_12183_cast_fp16 = mul(x = k_35_cast_fp16, y = cos_21_cast_fp16)[name = string("op_12183_cast_fp16")]; - tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_71_cast_fp16 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_35_cast_fp16)[name = string("x1_71_cast_fp16")]; - tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_71_cast_fp16 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_35_cast_fp16)[name = string("x2_71_cast_fp16")]; - fp16 const_663_promoted_to_fp16 = const()[name = string("const_663_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12204_cast_fp16 = mul(x = x2_71_cast_fp16, y = const_663_promoted_to_fp16)[name = string("op_12204_cast_fp16")]; - int32 var_12206 = const()[name = string("op_12206"), val = int32(-1)]; - bool var_12207_interleave_0 = const()[name = string("op_12207_interleave_0"), val = bool(false)]; - tensor var_12207_cast_fp16 = concat(axis = var_12206, interleave = var_12207_interleave_0, values = (var_12204_cast_fp16, x1_71_cast_fp16))[name = string("op_12207_cast_fp16")]; - tensor var_12208_cast_fp16 = mul(x = var_12207_cast_fp16, y = sin_21_cast_fp16)[name = string("op_12208_cast_fp16")]; - tensor key_states_69_cast_fp16 = add(x = var_12183_cast_fp16, y = var_12208_cast_fp16)[name = string("key_states_69_cast_fp16")]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_5_stride_0, update = key_states_69_cast_fp16, x = coreml_update_state_75)[name = string("model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_34_write_state")]; - tensor coreml_update_state_86 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_34")]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_6_stride_0, update = var_12095, x = coreml_update_state_86)[name = string("model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_35_write_state")]; - tensor coreml_update_state_87 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_35")]; - tensor var_12263_begin_0 = const()[name = string("op_12263_begin_0"), val = tensor([2, 0, 0, 0])]; - tensor var_12263_end_0 = const()[name = string("op_12263_end_0"), val = tensor([3, 1, 4096, 256])]; - tensor var_12263_end_mask_0 = const()[name = string("op_12263_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_12263_cast_fp16 = slice_by_index(begin = var_12263_begin_0, end = var_12263_end_0, end_mask = var_12263_end_mask_0, x = coreml_update_state_87)[name = string("op_12263_cast_fp16")]; - tensor var_12270_begin_0 = const()[name = string("op_12270_begin_0"), val = tensor([6, 0, 0, 0])]; - tensor var_12270_end_0 = const()[name = string("op_12270_end_0"), val = tensor([7, 1, 4096, 256])]; - tensor var_12270_end_mask_0 = const()[name = string("op_12270_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_12270_cast_fp16 = slice_by_index(begin = var_12270_begin_0, end = var_12270_end_0, end_mask = var_12270_end_mask_0, x = coreml_update_state_87)[name = string("op_12270_cast_fp16")]; - tensor var_12307 = const()[name = string("op_12307"), val = tensor([1, 4, 1, 1])]; - tensor x_277_cast_fp16 = tile(reps = var_12307, x = var_12263_cast_fp16)[name = string("x_277_cast_fp16")]; - tensor var_12327 = const()[name = string("op_12327"), val = tensor([1, 4, 1, 1])]; - tensor x_283_cast_fp16 = tile(reps = var_12327, x = var_12270_cast_fp16)[name = string("x_283_cast_fp16")]; - bool var_12354_transpose_x_1 = const()[name = string("op_12354_transpose_x_1"), val = bool(false)]; - bool var_12354_transpose_y_1 = const()[name = string("op_12354_transpose_y_1"), val = bool(true)]; - tensor var_12354 = matmul(transpose_x = var_12354_transpose_x_1, transpose_y = var_12354_transpose_y_1, x = query_states_69_cast_fp16, y = x_277_cast_fp16)[name = string("op_12354")]; - fp16 var_12355_to_fp16 = const()[name = string("op_12355_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_103_cast_fp16 = mul(x = var_12354, y = var_12355_to_fp16)[name = string("attn_weights_103_cast_fp16")]; - tensor attn_weights_105_cast_fp16 = add(x = attn_weights_103_cast_fp16, y = causal_mask)[name = string("attn_weights_105_cast_fp16")]; - int32 var_12390 = const()[name = string("op_12390"), val = int32(-1)]; - tensor attn_weights_107_cast_fp16 = softmax(axis = var_12390, x = attn_weights_105_cast_fp16)[name = string("attn_weights_107_cast_fp16")]; - bool attn_output_171_transpose_x_0 = const()[name = string("attn_output_171_transpose_x_0"), val = bool(false)]; - bool attn_output_171_transpose_y_0 = const()[name = string("attn_output_171_transpose_y_0"), val = bool(false)]; - tensor attn_output_171_cast_fp16 = matmul(transpose_x = attn_output_171_transpose_x_0, transpose_y = attn_output_171_transpose_y_0, x = attn_weights_107_cast_fp16, y = x_283_cast_fp16)[name = string("attn_output_171_cast_fp16")]; - tensor var_12401_perm_0 = const()[name = string("op_12401_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_12405 = const()[name = string("op_12405"), val = tensor([1, 1, 1024])]; - tensor var_12401_cast_fp16 = transpose(perm = var_12401_perm_0, x = attn_output_171_cast_fp16)[name = string("transpose_52")]; - tensor attn_output_175_cast_fp16 = reshape(shape = var_12405, x = var_12401_cast_fp16)[name = string("attn_output_175_cast_fp16")]; - tensor var_12410 = const()[name = string("op_12410"), val = tensor([0, 2, 1])]; - string var_12426_pad_type_0 = const()[name = string("op_12426_pad_type_0"), val = string("valid")]; - int32 var_12426_groups_0 = const()[name = string("op_12426_groups_0"), val = int32(1)]; - tensor var_12426_strides_0 = const()[name = string("op_12426_strides_0"), val = tensor([1])]; - tensor var_12426_pad_0 = const()[name = string("op_12426_pad_0"), val = tensor([0, 0])]; - tensor var_12426_dilations_0 = const()[name = string("op_12426_dilations_0"), val = tensor([1])]; - tensor squeeze_17_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376121728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377006528))))[name = string("squeeze_17_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_12411_cast_fp16 = transpose(perm = var_12410, x = attn_output_175_cast_fp16)[name = string("transpose_51")]; - tensor var_12426_cast_fp16 = conv(dilations = var_12426_dilations_0, groups = var_12426_groups_0, pad = var_12426_pad_0, pad_type = var_12426_pad_type_0, strides = var_12426_strides_0, weight = squeeze_17_cast_fp16_to_fp32_to_fp16_palettized, x = var_12411_cast_fp16)[name = string("op_12426_cast_fp16")]; - tensor var_12430 = const()[name = string("op_12430"), val = tensor([0, 2, 1])]; - int32 var_12441 = const()[name = string("op_12441"), val = int32(-1)]; - fp16 const_672_promoted_to_fp16 = const()[name = string("const_672_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_281_cast_fp16 = transpose(perm = var_12430, x = var_12426_cast_fp16)[name = string("transpose_50")]; - tensor var_12443_cast_fp16 = mul(x = hidden_states_281_cast_fp16, y = const_672_promoted_to_fp16)[name = string("op_12443_cast_fp16")]; - bool input_351_interleave_0 = const()[name = string("input_351_interleave_0"), val = bool(false)]; - tensor input_351_cast_fp16 = concat(axis = var_12441, interleave = input_351_interleave_0, values = (hidden_states_281_cast_fp16, var_12443_cast_fp16))[name = string("input_351_cast_fp16")]; - tensor normed_421_axes_0 = const()[name = string("normed_421_axes_0"), val = tensor([-1])]; - fp16 var_12438_to_fp16 = const()[name = string("op_12438_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_421_cast_fp16 = layer_norm(axes = normed_421_axes_0, epsilon = var_12438_to_fp16, x = input_351_cast_fp16)[name = string("normed_421_cast_fp16")]; - tensor normed_423_begin_0 = const()[name = string("normed_423_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_423_end_0 = const()[name = string("normed_423_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_423_end_mask_0 = const()[name = string("normed_423_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_423_cast_fp16 = slice_by_index(begin = normed_423_begin_0, end = normed_423_end_0, end_mask = normed_423_end_mask_0, x = normed_421_cast_fp16)[name = string("normed_423_cast_fp16")]; - tensor var_12457_to_fp16 = const()[name = string("op_12457_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377025024)))]; - tensor attn_output_179_cast_fp16 = mul(x = normed_423_cast_fp16, y = var_12457_to_fp16)[name = string("attn_output_179_cast_fp16")]; - tensor hidden_states_283_cast_fp16 = add(x = hidden_states_273_cast_fp16, y = attn_output_179_cast_fp16)[name = string("hidden_states_283_cast_fp16")]; - int32 var_12470 = const()[name = string("op_12470"), val = int32(-1)]; - fp16 const_676_promoted_to_fp16 = const()[name = string("const_676_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12472_cast_fp16 = mul(x = hidden_states_283_cast_fp16, y = const_676_promoted_to_fp16)[name = string("op_12472_cast_fp16")]; - bool input_353_interleave_0 = const()[name = string("input_353_interleave_0"), val = bool(false)]; - tensor input_353_cast_fp16 = concat(axis = var_12470, interleave = input_353_interleave_0, values = (hidden_states_283_cast_fp16, var_12472_cast_fp16))[name = string("input_353_cast_fp16")]; - tensor normed_425_axes_0 = const()[name = string("normed_425_axes_0"), val = tensor([-1])]; - fp16 var_12467_to_fp16 = const()[name = string("op_12467_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_425_cast_fp16 = layer_norm(axes = normed_425_axes_0, epsilon = var_12467_to_fp16, x = input_353_cast_fp16)[name = string("normed_425_cast_fp16")]; - tensor normed_427_begin_0 = const()[name = string("normed_427_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_427_end_0 = const()[name = string("normed_427_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_427_end_mask_0 = const()[name = string("normed_427_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_427_cast_fp16 = slice_by_index(begin = normed_427_begin_0, end = normed_427_end_0, end_mask = normed_427_end_mask_0, x = normed_425_cast_fp16)[name = string("normed_427_cast_fp16")]; - tensor var_12486_to_fp16 = const()[name = string("op_12486_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377027392)))]; - tensor x_285_cast_fp16 = mul(x = normed_427_cast_fp16, y = var_12486_to_fp16)[name = string("x_285_cast_fp16")]; - tensor var_12498 = const()[name = string("op_12498"), val = tensor([0, 2, 1])]; - tensor input_355_axes_0 = const()[name = string("input_355_axes_0"), val = tensor([2])]; - tensor var_12499_cast_fp16 = transpose(perm = var_12498, x = x_285_cast_fp16)[name = string("transpose_49")]; - tensor input_355_cast_fp16 = expand_dims(axes = input_355_axes_0, x = var_12499_cast_fp16)[name = string("input_355_cast_fp16")]; - string x_287_pad_type_0 = const()[name = string("x_287_pad_type_0"), val = string("valid")]; - tensor x_287_strides_0 = const()[name = string("x_287_strides_0"), val = tensor([1, 1])]; - tensor x_287_pad_0 = const()[name = string("x_287_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_287_dilations_0 = const()[name = string("x_287_dilations_0"), val = tensor([1, 1])]; - int32 x_287_groups_0 = const()[name = string("x_287_groups_0"), val = int32(1)]; - tensor model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377029760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383001792))))[name = string("model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_287_cast_fp16 = conv(dilations = x_287_dilations_0, groups = x_287_groups_0, pad = x_287_pad_0, pad_type = x_287_pad_type_0, strides = x_287_strides_0, weight = model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_355_cast_fp16)[name = string("x_287_cast_fp16")]; - string b_35_pad_type_0 = const()[name = string("b_35_pad_type_0"), val = string("valid")]; - tensor b_35_strides_0 = const()[name = string("b_35_strides_0"), val = tensor([1, 1])]; - tensor b_35_pad_0 = const()[name = string("b_35_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_35_dilations_0 = const()[name = string("b_35_dilations_0"), val = tensor([1, 1])]; - int32 b_35_groups_0 = const()[name = string("b_35_groups_0"), val = int32(1)]; - tensor model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383112448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389084480))))[name = string("model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_35_cast_fp16 = conv(dilations = b_35_dilations_0, groups = b_35_groups_0, pad = b_35_pad_0, pad_type = b_35_pad_type_0, strides = b_35_strides_0, weight = model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_355_cast_fp16)[name = string("b_35_cast_fp16")]; - string var_12524_mode_0 = const()[name = string("op_12524_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_12524_cast_fp16 = gelu(mode = var_12524_mode_0, x = x_287_cast_fp16)[name = string("op_12524_cast_fp16")]; - tensor input_357_cast_fp16 = mul(x = var_12524_cast_fp16, y = b_35_cast_fp16)[name = string("input_357_cast_fp16")]; - string e_35_pad_type_0 = const()[name = string("e_35_pad_type_0"), val = string("valid")]; - tensor e_35_strides_0 = const()[name = string("e_35_strides_0"), val = tensor([1, 1])]; - tensor e_35_pad_0 = const()[name = string("e_35_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_35_dilations_0 = const()[name = string("e_35_dilations_0"), val = tensor([1, 1])]; - int32 e_35_groups_0 = const()[name = string("e_35_groups_0"), val = int32(1)]; - tensor model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389195136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395167168))))[name = string("model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_35_cast_fp16 = conv(dilations = e_35_dilations_0, groups = e_35_groups_0, pad = e_35_pad_0, pad_type = e_35_pad_type_0, strides = e_35_strides_0, weight = model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_357_cast_fp16)[name = string("e_35_cast_fp16")]; - tensor var_12532_axes_0 = const()[name = string("op_12532_axes_0"), val = tensor([2])]; - tensor var_12532_cast_fp16 = squeeze(axes = var_12532_axes_0, x = e_35_cast_fp16)[name = string("op_12532_cast_fp16")]; - tensor var_12533 = const()[name = string("op_12533"), val = tensor([0, 2, 1])]; - int32 var_12544 = const()[name = string("op_12544"), val = int32(-1)]; - fp16 const_680_promoted_to_fp16 = const()[name = string("const_680_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_285_cast_fp16 = transpose(perm = var_12533, x = var_12532_cast_fp16)[name = string("transpose_48")]; - tensor var_12546_cast_fp16 = mul(x = hidden_states_285_cast_fp16, y = const_680_promoted_to_fp16)[name = string("op_12546_cast_fp16")]; - bool input_359_interleave_0 = const()[name = string("input_359_interleave_0"), val = bool(false)]; - tensor input_359_cast_fp16 = concat(axis = var_12544, interleave = input_359_interleave_0, values = (hidden_states_285_cast_fp16, var_12546_cast_fp16))[name = string("input_359_cast_fp16")]; - tensor normed_429_axes_0 = const()[name = string("normed_429_axes_0"), val = tensor([-1])]; - fp16 var_12541_to_fp16 = const()[name = string("op_12541_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_429_cast_fp16 = layer_norm(axes = normed_429_axes_0, epsilon = var_12541_to_fp16, x = input_359_cast_fp16)[name = string("normed_429_cast_fp16")]; - tensor normed_431_begin_0 = const()[name = string("normed_431_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_431_end_0 = const()[name = string("normed_431_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_431_end_mask_0 = const()[name = string("normed_431_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_431_cast_fp16 = slice_by_index(begin = normed_431_begin_0, end = normed_431_end_0, end_mask = normed_431_end_mask_0, x = normed_429_cast_fp16)[name = string("normed_431_cast_fp16")]; - tensor var_12560_to_fp16 = const()[name = string("op_12560_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395185664)))]; - tensor hidden_states_287_cast_fp16 = mul(x = normed_431_cast_fp16, y = var_12560_to_fp16)[name = string("hidden_states_287_cast_fp16")]; - tensor hidden_states_289_cast_fp16 = add(x = hidden_states_283_cast_fp16, y = hidden_states_287_cast_fp16)[name = string("hidden_states_289_cast_fp16")]; - int32 var_12611 = const()[name = string("op_12611"), val = int32(-1)]; - fp16 const_684_promoted_to_fp16 = const()[name = string("const_684_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12613_cast_fp16 = mul(x = hidden_states_289_cast_fp16, y = const_684_promoted_to_fp16)[name = string("op_12613_cast_fp16")]; - bool input_361_interleave_0 = const()[name = string("input_361_interleave_0"), val = bool(false)]; - tensor input_361_cast_fp16 = concat(axis = var_12611, interleave = input_361_interleave_0, values = (hidden_states_289_cast_fp16, var_12613_cast_fp16))[name = string("input_361_cast_fp16")]; - tensor normed_433_axes_0 = const()[name = string("normed_433_axes_0"), val = tensor([-1])]; - fp16 var_12608_to_fp16 = const()[name = string("op_12608_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_433_cast_fp16 = layer_norm(axes = normed_433_axes_0, epsilon = var_12608_to_fp16, x = input_361_cast_fp16)[name = string("normed_433_cast_fp16")]; - tensor normed_435_begin_0 = const()[name = string("normed_435_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_435_end_0 = const()[name = string("normed_435_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_435_end_mask_0 = const()[name = string("normed_435_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_435_cast_fp16 = slice_by_index(begin = normed_435_begin_0, end = normed_435_end_0, end_mask = normed_435_end_mask_0, x = normed_433_cast_fp16)[name = string("normed_435_cast_fp16")]; - tensor var_12627_to_fp16 = const()[name = string("op_12627_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395188032)))]; - tensor hidden_states_291_cast_fp16 = mul(x = normed_435_cast_fp16, y = var_12627_to_fp16)[name = string("hidden_states_291_cast_fp16")]; - tensor var_12632 = const()[name = string("op_12632"), val = tensor([0, 2, 1])]; - tensor var_12635_axes_0 = const()[name = string("op_12635_axes_0"), val = tensor([2])]; - tensor var_12633_cast_fp16 = transpose(perm = var_12632, x = hidden_states_291_cast_fp16)[name = string("transpose_47")]; - tensor var_12635_cast_fp16 = expand_dims(axes = var_12635_axes_0, x = var_12633_cast_fp16)[name = string("op_12635_cast_fp16")]; - string var_12651_pad_type_0 = const()[name = string("op_12651_pad_type_0"), val = string("valid")]; - tensor var_12651_strides_0 = const()[name = string("op_12651_strides_0"), val = tensor([1, 1])]; - tensor var_12651_pad_0 = const()[name = string("op_12651_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_12651_dilations_0 = const()[name = string("op_12651_dilations_0"), val = tensor([1, 1])]; - int32 var_12651_groups_0 = const()[name = string("op_12651_groups_0"), val = int32(1)]; - tensor var_12651 = conv(dilations = var_12651_dilations_0, groups = var_12651_groups_0, pad = var_12651_pad_0, pad_type = var_12651_pad_type_0, strides = var_12651_strides_0, weight = model_model_layers_18_self_attn_q_proj_weight_palettized, x = var_12635_cast_fp16)[name = string("op_12651")]; - tensor var_12656 = const()[name = string("op_12656"), val = tensor([1, 4, 1, 256])]; - tensor var_12657 = reshape(shape = var_12656, x = var_12651)[name = string("op_12657")]; - string var_12673_pad_type_0 = const()[name = string("op_12673_pad_type_0"), val = string("valid")]; - tensor var_12673_strides_0 = const()[name = string("op_12673_strides_0"), val = tensor([1, 1])]; - tensor var_12673_pad_0 = const()[name = string("op_12673_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_12673_dilations_0 = const()[name = string("op_12673_dilations_0"), val = tensor([1, 1])]; - int32 var_12673_groups_0 = const()[name = string("op_12673_groups_0"), val = int32(1)]; - tensor var_12673 = conv(dilations = var_12673_dilations_0, groups = var_12673_groups_0, pad = var_12673_pad_0, pad_type = var_12673_pad_type_0, strides = var_12673_strides_0, weight = model_model_layers_18_self_attn_k_proj_weight_palettized, x = var_12635_cast_fp16)[name = string("op_12673")]; - tensor var_12678 = const()[name = string("op_12678"), val = tensor([1, 1, 1, 256])]; - tensor var_12679 = reshape(shape = var_12678, x = var_12673)[name = string("op_12679")]; - string var_12695_pad_type_0 = const()[name = string("op_12695_pad_type_0"), val = string("valid")]; - tensor var_12695_strides_0 = const()[name = string("op_12695_strides_0"), val = tensor([1, 1])]; - tensor var_12695_pad_0 = const()[name = string("op_12695_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_12695_dilations_0 = const()[name = string("op_12695_dilations_0"), val = tensor([1, 1])]; - int32 var_12695_groups_0 = const()[name = string("op_12695_groups_0"), val = int32(1)]; - tensor var_12695 = conv(dilations = var_12695_dilations_0, groups = var_12695_groups_0, pad = var_12695_pad_0, pad_type = var_12695_pad_type_0, strides = var_12695_strides_0, weight = model_model_layers_18_self_attn_v_proj_weight_palettized, x = var_12635_cast_fp16)[name = string("op_12695")]; - tensor var_12700 = const()[name = string("op_12700"), val = tensor([1, 1, 1, 256])]; - tensor var_12701 = reshape(shape = var_12700, x = var_12695)[name = string("op_12701")]; - int32 var_12716 = const()[name = string("op_12716"), val = int32(-1)]; - fp16 const_688_promoted = const()[name = string("const_688_promoted"), val = fp16(-0x1p+0)]; - tensor var_12718 = mul(x = var_12657, y = const_688_promoted)[name = string("op_12718")]; - bool input_365_interleave_0 = const()[name = string("input_365_interleave_0"), val = bool(false)]; - tensor input_365 = concat(axis = var_12716, interleave = input_365_interleave_0, values = (var_12657, var_12718))[name = string("input_365")]; - tensor normed_437_axes_0 = const()[name = string("normed_437_axes_0"), val = tensor([-1])]; - fp16 var_12713_to_fp16 = const()[name = string("op_12713_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_437_cast_fp16 = layer_norm(axes = normed_437_axes_0, epsilon = var_12713_to_fp16, x = input_365)[name = string("normed_437_cast_fp16")]; - tensor normed_439_begin_0 = const()[name = string("normed_439_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_439_end_0 = const()[name = string("normed_439_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_439_end_mask_0 = const()[name = string("normed_439_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_439 = slice_by_index(begin = normed_439_begin_0, end = normed_439_end_0, end_mask = normed_439_end_mask_0, x = normed_437_cast_fp16)[name = string("normed_439")]; - tensor var_12732_to_fp16 = const()[name = string("op_12732_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395190400)))]; - tensor q_37_cast_fp16 = mul(x = normed_439, y = var_12732_to_fp16)[name = string("q_37_cast_fp16")]; - int32 var_12743 = const()[name = string("op_12743"), val = int32(-1)]; - fp16 const_692_promoted = const()[name = string("const_692_promoted"), val = fp16(-0x1p+0)]; - tensor var_12745 = mul(x = var_12679, y = const_692_promoted)[name = string("op_12745")]; - bool input_367_interleave_0 = const()[name = string("input_367_interleave_0"), val = bool(false)]; - tensor input_367 = concat(axis = var_12743, interleave = input_367_interleave_0, values = (var_12679, var_12745))[name = string("input_367")]; - tensor normed_441_axes_0 = const()[name = string("normed_441_axes_0"), val = tensor([-1])]; - fp16 var_12740_to_fp16 = const()[name = string("op_12740_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_441_cast_fp16 = layer_norm(axes = normed_441_axes_0, epsilon = var_12740_to_fp16, x = input_367)[name = string("normed_441_cast_fp16")]; - tensor normed_443_begin_0 = const()[name = string("normed_443_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_443_end_0 = const()[name = string("normed_443_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_443_end_mask_0 = const()[name = string("normed_443_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_443 = slice_by_index(begin = normed_443_begin_0, end = normed_443_end_0, end_mask = normed_443_end_mask_0, x = normed_441_cast_fp16)[name = string("normed_443")]; - tensor var_12759_to_fp16 = const()[name = string("op_12759_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395190976)))]; - tensor k_37_cast_fp16 = mul(x = normed_443, y = var_12759_to_fp16)[name = string("k_37_cast_fp16")]; - tensor var_12761_cast_fp16 = mul(x = q_37_cast_fp16, y = cos_1_cast_fp16)[name = string("op_12761_cast_fp16")]; - tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_73_cast_fp16 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_37_cast_fp16)[name = string("x1_73_cast_fp16")]; - tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_73_cast_fp16 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_37_cast_fp16)[name = string("x2_73_cast_fp16")]; - fp16 const_698_promoted_to_fp16 = const()[name = string("const_698_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12782_cast_fp16 = mul(x = x2_73_cast_fp16, y = const_698_promoted_to_fp16)[name = string("op_12782_cast_fp16")]; - int32 var_12784 = const()[name = string("op_12784"), val = int32(-1)]; - bool var_12785_interleave_0 = const()[name = string("op_12785_interleave_0"), val = bool(false)]; - tensor var_12785_cast_fp16 = concat(axis = var_12784, interleave = var_12785_interleave_0, values = (var_12782_cast_fp16, x1_73_cast_fp16))[name = string("op_12785_cast_fp16")]; - tensor var_12786_cast_fp16 = mul(x = var_12785_cast_fp16, y = sin_1_cast_fp16)[name = string("op_12786_cast_fp16")]; - tensor query_states_73_cast_fp16 = add(x = var_12761_cast_fp16, y = var_12786_cast_fp16)[name = string("query_states_73_cast_fp16")]; - tensor var_12789_cast_fp16 = mul(x = k_37_cast_fp16, y = cos_1_cast_fp16)[name = string("op_12789_cast_fp16")]; - tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_75_cast_fp16 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_37_cast_fp16)[name = string("x1_75_cast_fp16")]; - tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_75_cast_fp16 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_37_cast_fp16)[name = string("x2_75_cast_fp16")]; - fp16 const_701_promoted_to_fp16 = const()[name = string("const_701_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12810_cast_fp16 = mul(x = x2_75_cast_fp16, y = const_701_promoted_to_fp16)[name = string("op_12810_cast_fp16")]; - int32 var_12812 = const()[name = string("op_12812"), val = int32(-1)]; - bool var_12813_interleave_0 = const()[name = string("op_12813_interleave_0"), val = bool(false)]; - tensor var_12813_cast_fp16 = concat(axis = var_12812, interleave = var_12813_interleave_0, values = (var_12810_cast_fp16, x1_75_cast_fp16))[name = string("op_12813_cast_fp16")]; - tensor var_12814_cast_fp16 = mul(x = var_12813_cast_fp16, y = sin_1_cast_fp16)[name = string("op_12814_cast_fp16")]; - tensor key_states_73_cast_fp16 = add(x = var_12789_cast_fp16, y = var_12814_cast_fp16)[name = string("key_states_73_cast_fp16")]; - tensor expand_dims_216 = const()[name = string("expand_dims_216"), val = tensor([15])]; - tensor expand_dims_217 = const()[name = string("expand_dims_217"), val = tensor([0])]; - tensor expand_dims_219 = const()[name = string("expand_dims_219"), val = tensor([0])]; - tensor expand_dims_220 = const()[name = string("expand_dims_220"), val = tensor([16])]; - int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; - bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; - tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (expand_dims_216, expand_dims_217, current_pos, expand_dims_219))[name = string("concat_146")]; - tensor concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor([0])]; - tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; - int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; - bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; - tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_220, concat_147_values1_0, var_1909, concat_147_values3_0))[name = string("concat_147")]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_146, begin_mask = model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0, end = concat_147, end_mask = model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_31_stride_0, update = key_states_73_cast_fp16, x = coreml_update_state_85)[name = string("model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_36_write_state")]; - tensor coreml_update_state_88 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_36")]; - tensor expand_dims_222 = const()[name = string("expand_dims_222"), val = tensor([37])]; - tensor expand_dims_223 = const()[name = string("expand_dims_223"), val = tensor([0])]; - tensor expand_dims_225 = const()[name = string("expand_dims_225"), val = tensor([0])]; - tensor expand_dims_226 = const()[name = string("expand_dims_226"), val = tensor([38])]; - int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; - bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; - tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (expand_dims_222, expand_dims_223, current_pos, expand_dims_225))[name = string("concat_150")]; - tensor concat_151_values1_0 = const()[name = string("concat_151_values1_0"), val = tensor([0])]; - tensor concat_151_values3_0 = const()[name = string("concat_151_values3_0"), val = tensor([0])]; - int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; - bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; - tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (expand_dims_226, concat_151_values1_0, var_1909, concat_151_values3_0))[name = string("concat_151")]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_150, begin_mask = model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0, end = concat_151, end_mask = model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_32_stride_0, update = var_12701, x = coreml_update_state_88)[name = string("model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_37_write_state")]; - tensor coreml_update_state_89 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_37")]; - tensor var_12869_begin_0 = const()[name = string("op_12869_begin_0"), val = tensor([15, 0, 0, 0])]; - tensor var_12869_end_0 = const()[name = string("op_12869_end_0"), val = tensor([16, 1, 512, 256])]; - tensor var_12869_end_mask_0 = const()[name = string("op_12869_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_12869_cast_fp16 = slice_by_index(begin = var_12869_begin_0, end = var_12869_end_0, end_mask = var_12869_end_mask_0, x = coreml_update_state_89)[name = string("op_12869_cast_fp16")]; - tensor var_12876_begin_0 = const()[name = string("op_12876_begin_0"), val = tensor([37, 0, 0, 0])]; - tensor var_12876_end_0 = const()[name = string("op_12876_end_0"), val = tensor([38, 1, 512, 256])]; - tensor var_12876_end_mask_0 = const()[name = string("op_12876_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_12876_cast_fp16 = slice_by_index(begin = var_12876_begin_0, end = var_12876_end_0, end_mask = var_12876_end_mask_0, x = coreml_update_state_89)[name = string("op_12876_cast_fp16")]; - tensor var_12913 = const()[name = string("op_12913"), val = tensor([1, 4, 1, 1])]; - tensor x_293_cast_fp16 = tile(reps = var_12913, x = var_12869_cast_fp16)[name = string("x_293_cast_fp16")]; - tensor var_12933 = const()[name = string("op_12933"), val = tensor([1, 4, 1, 1])]; - tensor x_299_cast_fp16 = tile(reps = var_12933, x = var_12876_cast_fp16)[name = string("x_299_cast_fp16")]; - bool var_12960_transpose_x_1 = const()[name = string("op_12960_transpose_x_1"), val = bool(false)]; - bool var_12960_transpose_y_1 = const()[name = string("op_12960_transpose_y_1"), val = bool(true)]; - tensor var_12960 = matmul(transpose_x = var_12960_transpose_x_1, transpose_y = var_12960_transpose_y_1, x = query_states_73_cast_fp16, y = x_293_cast_fp16)[name = string("op_12960")]; - fp16 var_12961_to_fp16 = const()[name = string("op_12961_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_109_cast_fp16 = mul(x = var_12960, y = var_12961_to_fp16)[name = string("attn_weights_109_cast_fp16")]; - tensor attn_weights_111_cast_fp16 = add(x = attn_weights_109_cast_fp16, y = var_2083)[name = string("attn_weights_111_cast_fp16")]; - int32 var_12996 = const()[name = string("op_12996"), val = int32(-1)]; - tensor attn_weights_113_cast_fp16 = softmax(axis = var_12996, x = attn_weights_111_cast_fp16)[name = string("attn_weights_113_cast_fp16")]; - bool attn_output_181_transpose_x_0 = const()[name = string("attn_output_181_transpose_x_0"), val = bool(false)]; - bool attn_output_181_transpose_y_0 = const()[name = string("attn_output_181_transpose_y_0"), val = bool(false)]; - tensor attn_output_181_cast_fp16 = matmul(transpose_x = attn_output_181_transpose_x_0, transpose_y = attn_output_181_transpose_y_0, x = attn_weights_113_cast_fp16, y = x_299_cast_fp16)[name = string("attn_output_181_cast_fp16")]; - tensor var_13007_perm_0 = const()[name = string("op_13007_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_13011 = const()[name = string("op_13011"), val = tensor([1, 1, 1024])]; - tensor var_13007_cast_fp16 = transpose(perm = var_13007_perm_0, x = attn_output_181_cast_fp16)[name = string("transpose_46")]; - tensor attn_output_185_cast_fp16 = reshape(shape = var_13011, x = var_13007_cast_fp16)[name = string("attn_output_185_cast_fp16")]; - tensor var_13016 = const()[name = string("op_13016"), val = tensor([0, 2, 1])]; - string var_13032_pad_type_0 = const()[name = string("op_13032_pad_type_0"), val = string("valid")]; - int32 var_13032_groups_0 = const()[name = string("op_13032_groups_0"), val = int32(1)]; - tensor var_13032_strides_0 = const()[name = string("op_13032_strides_0"), val = tensor([1])]; - tensor var_13032_pad_0 = const()[name = string("op_13032_pad_0"), val = tensor([0, 0])]; - tensor var_13032_dilations_0 = const()[name = string("op_13032_dilations_0"), val = tensor([1])]; - tensor squeeze_18_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395191552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396076352))))[name = string("squeeze_18_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_13017_cast_fp16 = transpose(perm = var_13016, x = attn_output_185_cast_fp16)[name = string("transpose_45")]; - tensor var_13032_cast_fp16 = conv(dilations = var_13032_dilations_0, groups = var_13032_groups_0, pad = var_13032_pad_0, pad_type = var_13032_pad_type_0, strides = var_13032_strides_0, weight = squeeze_18_cast_fp16_to_fp32_to_fp16_palettized, x = var_13017_cast_fp16)[name = string("op_13032_cast_fp16")]; - tensor var_13036 = const()[name = string("op_13036"), val = tensor([0, 2, 1])]; - int32 var_13047 = const()[name = string("op_13047"), val = int32(-1)]; - fp16 const_710_promoted_to_fp16 = const()[name = string("const_710_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_297_cast_fp16 = transpose(perm = var_13036, x = var_13032_cast_fp16)[name = string("transpose_44")]; - tensor var_13049_cast_fp16 = mul(x = hidden_states_297_cast_fp16, y = const_710_promoted_to_fp16)[name = string("op_13049_cast_fp16")]; - bool input_371_interleave_0 = const()[name = string("input_371_interleave_0"), val = bool(false)]; - tensor input_371_cast_fp16 = concat(axis = var_13047, interleave = input_371_interleave_0, values = (hidden_states_297_cast_fp16, var_13049_cast_fp16))[name = string("input_371_cast_fp16")]; - tensor normed_445_axes_0 = const()[name = string("normed_445_axes_0"), val = tensor([-1])]; - fp16 var_13044_to_fp16 = const()[name = string("op_13044_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_445_cast_fp16 = layer_norm(axes = normed_445_axes_0, epsilon = var_13044_to_fp16, x = input_371_cast_fp16)[name = string("normed_445_cast_fp16")]; - tensor normed_447_begin_0 = const()[name = string("normed_447_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_447_end_0 = const()[name = string("normed_447_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_447_end_mask_0 = const()[name = string("normed_447_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_447_cast_fp16 = slice_by_index(begin = normed_447_begin_0, end = normed_447_end_0, end_mask = normed_447_end_mask_0, x = normed_445_cast_fp16)[name = string("normed_447_cast_fp16")]; - tensor var_13063_to_fp16 = const()[name = string("op_13063_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396094848)))]; - tensor attn_output_189_cast_fp16 = mul(x = normed_447_cast_fp16, y = var_13063_to_fp16)[name = string("attn_output_189_cast_fp16")]; - tensor hidden_states_299_cast_fp16 = add(x = hidden_states_289_cast_fp16, y = attn_output_189_cast_fp16)[name = string("hidden_states_299_cast_fp16")]; - int32 var_13076 = const()[name = string("op_13076"), val = int32(-1)]; - fp16 const_714_promoted_to_fp16 = const()[name = string("const_714_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13078_cast_fp16 = mul(x = hidden_states_299_cast_fp16, y = const_714_promoted_to_fp16)[name = string("op_13078_cast_fp16")]; - bool input_373_interleave_0 = const()[name = string("input_373_interleave_0"), val = bool(false)]; - tensor input_373_cast_fp16 = concat(axis = var_13076, interleave = input_373_interleave_0, values = (hidden_states_299_cast_fp16, var_13078_cast_fp16))[name = string("input_373_cast_fp16")]; - tensor normed_449_axes_0 = const()[name = string("normed_449_axes_0"), val = tensor([-1])]; - fp16 var_13073_to_fp16 = const()[name = string("op_13073_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_449_cast_fp16 = layer_norm(axes = normed_449_axes_0, epsilon = var_13073_to_fp16, x = input_373_cast_fp16)[name = string("normed_449_cast_fp16")]; - tensor normed_451_begin_0 = const()[name = string("normed_451_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_451_end_0 = const()[name = string("normed_451_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_451_end_mask_0 = const()[name = string("normed_451_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_451_cast_fp16 = slice_by_index(begin = normed_451_begin_0, end = normed_451_end_0, end_mask = normed_451_end_mask_0, x = normed_449_cast_fp16)[name = string("normed_451_cast_fp16")]; - tensor var_13092_to_fp16 = const()[name = string("op_13092_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396097216)))]; - tensor x_301_cast_fp16 = mul(x = normed_451_cast_fp16, y = var_13092_to_fp16)[name = string("x_301_cast_fp16")]; - tensor var_13104 = const()[name = string("op_13104"), val = tensor([0, 2, 1])]; - tensor input_375_axes_0 = const()[name = string("input_375_axes_0"), val = tensor([2])]; - tensor var_13105_cast_fp16 = transpose(perm = var_13104, x = x_301_cast_fp16)[name = string("transpose_43")]; - tensor input_375_cast_fp16 = expand_dims(axes = input_375_axes_0, x = var_13105_cast_fp16)[name = string("input_375_cast_fp16")]; - string x_303_pad_type_0 = const()[name = string("x_303_pad_type_0"), val = string("valid")]; - tensor x_303_strides_0 = const()[name = string("x_303_strides_0"), val = tensor([1, 1])]; - tensor x_303_pad_0 = const()[name = string("x_303_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_303_dilations_0 = const()[name = string("x_303_dilations_0"), val = tensor([1, 1])]; - int32 x_303_groups_0 = const()[name = string("x_303_groups_0"), val = int32(1)]; - tensor model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396099584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(402071616))))[name = string("model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_303_cast_fp16 = conv(dilations = x_303_dilations_0, groups = x_303_groups_0, pad = x_303_pad_0, pad_type = x_303_pad_type_0, strides = x_303_strides_0, weight = model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_375_cast_fp16)[name = string("x_303_cast_fp16")]; - string b_37_pad_type_0 = const()[name = string("b_37_pad_type_0"), val = string("valid")]; - tensor b_37_strides_0 = const()[name = string("b_37_strides_0"), val = tensor([1, 1])]; - tensor b_37_pad_0 = const()[name = string("b_37_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_37_dilations_0 = const()[name = string("b_37_dilations_0"), val = tensor([1, 1])]; - int32 b_37_groups_0 = const()[name = string("b_37_groups_0"), val = int32(1)]; - tensor model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(402182272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408154304))))[name = string("model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_37_cast_fp16 = conv(dilations = b_37_dilations_0, groups = b_37_groups_0, pad = b_37_pad_0, pad_type = b_37_pad_type_0, strides = b_37_strides_0, weight = model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_375_cast_fp16)[name = string("b_37_cast_fp16")]; - string var_13130_mode_0 = const()[name = string("op_13130_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_13130_cast_fp16 = gelu(mode = var_13130_mode_0, x = x_303_cast_fp16)[name = string("op_13130_cast_fp16")]; - tensor input_377_cast_fp16 = mul(x = var_13130_cast_fp16, y = b_37_cast_fp16)[name = string("input_377_cast_fp16")]; - string e_37_pad_type_0 = const()[name = string("e_37_pad_type_0"), val = string("valid")]; - tensor e_37_strides_0 = const()[name = string("e_37_strides_0"), val = tensor([1, 1])]; - tensor e_37_pad_0 = const()[name = string("e_37_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_37_dilations_0 = const()[name = string("e_37_dilations_0"), val = tensor([1, 1])]; - int32 e_37_groups_0 = const()[name = string("e_37_groups_0"), val = int32(1)]; - tensor model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408264960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414236992))))[name = string("model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_37_cast_fp16 = conv(dilations = e_37_dilations_0, groups = e_37_groups_0, pad = e_37_pad_0, pad_type = e_37_pad_type_0, strides = e_37_strides_0, weight = model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_377_cast_fp16)[name = string("e_37_cast_fp16")]; - tensor var_13138_axes_0 = const()[name = string("op_13138_axes_0"), val = tensor([2])]; - tensor var_13138_cast_fp16 = squeeze(axes = var_13138_axes_0, x = e_37_cast_fp16)[name = string("op_13138_cast_fp16")]; - tensor var_13139 = const()[name = string("op_13139"), val = tensor([0, 2, 1])]; - int32 var_13150 = const()[name = string("op_13150"), val = int32(-1)]; - fp16 const_718_promoted_to_fp16 = const()[name = string("const_718_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_301_cast_fp16 = transpose(perm = var_13139, x = var_13138_cast_fp16)[name = string("transpose_42")]; - tensor var_13152_cast_fp16 = mul(x = hidden_states_301_cast_fp16, y = const_718_promoted_to_fp16)[name = string("op_13152_cast_fp16")]; - bool input_379_interleave_0 = const()[name = string("input_379_interleave_0"), val = bool(false)]; - tensor input_379_cast_fp16 = concat(axis = var_13150, interleave = input_379_interleave_0, values = (hidden_states_301_cast_fp16, var_13152_cast_fp16))[name = string("input_379_cast_fp16")]; - tensor normed_453_axes_0 = const()[name = string("normed_453_axes_0"), val = tensor([-1])]; - fp16 var_13147_to_fp16 = const()[name = string("op_13147_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_453_cast_fp16 = layer_norm(axes = normed_453_axes_0, epsilon = var_13147_to_fp16, x = input_379_cast_fp16)[name = string("normed_453_cast_fp16")]; - tensor normed_455_begin_0 = const()[name = string("normed_455_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_455_end_0 = const()[name = string("normed_455_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_455_end_mask_0 = const()[name = string("normed_455_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_455_cast_fp16 = slice_by_index(begin = normed_455_begin_0, end = normed_455_end_0, end_mask = normed_455_end_mask_0, x = normed_453_cast_fp16)[name = string("normed_455_cast_fp16")]; - tensor var_13166_to_fp16 = const()[name = string("op_13166_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414255488)))]; - tensor hidden_states_303_cast_fp16 = mul(x = normed_455_cast_fp16, y = var_13166_to_fp16)[name = string("hidden_states_303_cast_fp16")]; - tensor hidden_states_305_cast_fp16 = add(x = hidden_states_299_cast_fp16, y = hidden_states_303_cast_fp16)[name = string("hidden_states_305_cast_fp16")]; - int32 var_13217 = const()[name = string("op_13217"), val = int32(-1)]; - fp16 const_722_promoted_to_fp16 = const()[name = string("const_722_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13219_cast_fp16 = mul(x = hidden_states_305_cast_fp16, y = const_722_promoted_to_fp16)[name = string("op_13219_cast_fp16")]; - bool input_381_interleave_0 = const()[name = string("input_381_interleave_0"), val = bool(false)]; - tensor input_381_cast_fp16 = concat(axis = var_13217, interleave = input_381_interleave_0, values = (hidden_states_305_cast_fp16, var_13219_cast_fp16))[name = string("input_381_cast_fp16")]; - tensor normed_457_axes_0 = const()[name = string("normed_457_axes_0"), val = tensor([-1])]; - fp16 var_13214_to_fp16 = const()[name = string("op_13214_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_457_cast_fp16 = layer_norm(axes = normed_457_axes_0, epsilon = var_13214_to_fp16, x = input_381_cast_fp16)[name = string("normed_457_cast_fp16")]; - tensor normed_459_begin_0 = const()[name = string("normed_459_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_459_end_0 = const()[name = string("normed_459_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_459_end_mask_0 = const()[name = string("normed_459_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_459_cast_fp16 = slice_by_index(begin = normed_459_begin_0, end = normed_459_end_0, end_mask = normed_459_end_mask_0, x = normed_457_cast_fp16)[name = string("normed_459_cast_fp16")]; - tensor var_13233_to_fp16 = const()[name = string("op_13233_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414257856)))]; - tensor hidden_states_307_cast_fp16 = mul(x = normed_459_cast_fp16, y = var_13233_to_fp16)[name = string("hidden_states_307_cast_fp16")]; - tensor var_13238 = const()[name = string("op_13238"), val = tensor([0, 2, 1])]; - tensor var_13241_axes_0 = const()[name = string("op_13241_axes_0"), val = tensor([2])]; - tensor var_13239_cast_fp16 = transpose(perm = var_13238, x = hidden_states_307_cast_fp16)[name = string("transpose_41")]; - tensor var_13241_cast_fp16 = expand_dims(axes = var_13241_axes_0, x = var_13239_cast_fp16)[name = string("op_13241_cast_fp16")]; - string var_13257_pad_type_0 = const()[name = string("op_13257_pad_type_0"), val = string("valid")]; - tensor var_13257_strides_0 = const()[name = string("op_13257_strides_0"), val = tensor([1, 1])]; - tensor var_13257_pad_0 = const()[name = string("op_13257_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_13257_dilations_0 = const()[name = string("op_13257_dilations_0"), val = tensor([1, 1])]; - int32 var_13257_groups_0 = const()[name = string("op_13257_groups_0"), val = int32(1)]; - tensor var_13257 = conv(dilations = var_13257_dilations_0, groups = var_13257_groups_0, pad = var_13257_pad_0, pad_type = var_13257_pad_type_0, strides = var_13257_strides_0, weight = model_model_layers_19_self_attn_q_proj_weight_palettized, x = var_13241_cast_fp16)[name = string("op_13257")]; - tensor var_13262 = const()[name = string("op_13262"), val = tensor([1, 4, 1, 256])]; - tensor var_13263 = reshape(shape = var_13262, x = var_13257)[name = string("op_13263")]; - string var_13279_pad_type_0 = const()[name = string("op_13279_pad_type_0"), val = string("valid")]; - tensor var_13279_strides_0 = const()[name = string("op_13279_strides_0"), val = tensor([1, 1])]; - tensor var_13279_pad_0 = const()[name = string("op_13279_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_13279_dilations_0 = const()[name = string("op_13279_dilations_0"), val = tensor([1, 1])]; - int32 var_13279_groups_0 = const()[name = string("op_13279_groups_0"), val = int32(1)]; - tensor var_13279 = conv(dilations = var_13279_dilations_0, groups = var_13279_groups_0, pad = var_13279_pad_0, pad_type = var_13279_pad_type_0, strides = var_13279_strides_0, weight = model_model_layers_19_self_attn_k_proj_weight_palettized, x = var_13241_cast_fp16)[name = string("op_13279")]; - tensor var_13284 = const()[name = string("op_13284"), val = tensor([1, 1, 1, 256])]; - tensor var_13285 = reshape(shape = var_13284, x = var_13279)[name = string("op_13285")]; - string var_13301_pad_type_0 = const()[name = string("op_13301_pad_type_0"), val = string("valid")]; - tensor var_13301_strides_0 = const()[name = string("op_13301_strides_0"), val = tensor([1, 1])]; - tensor var_13301_pad_0 = const()[name = string("op_13301_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_13301_dilations_0 = const()[name = string("op_13301_dilations_0"), val = tensor([1, 1])]; - int32 var_13301_groups_0 = const()[name = string("op_13301_groups_0"), val = int32(1)]; - tensor var_13301 = conv(dilations = var_13301_dilations_0, groups = var_13301_groups_0, pad = var_13301_pad_0, pad_type = var_13301_pad_type_0, strides = var_13301_strides_0, weight = model_model_layers_19_self_attn_v_proj_weight_palettized, x = var_13241_cast_fp16)[name = string("op_13301")]; - tensor var_13306 = const()[name = string("op_13306"), val = tensor([1, 1, 1, 256])]; - tensor var_13307 = reshape(shape = var_13306, x = var_13301)[name = string("op_13307")]; - int32 var_13322 = const()[name = string("op_13322"), val = int32(-1)]; - fp16 const_726_promoted = const()[name = string("const_726_promoted"), val = fp16(-0x1p+0)]; - tensor var_13324 = mul(x = var_13263, y = const_726_promoted)[name = string("op_13324")]; - bool input_385_interleave_0 = const()[name = string("input_385_interleave_0"), val = bool(false)]; - tensor input_385 = concat(axis = var_13322, interleave = input_385_interleave_0, values = (var_13263, var_13324))[name = string("input_385")]; - tensor normed_461_axes_0 = const()[name = string("normed_461_axes_0"), val = tensor([-1])]; - fp16 var_13319_to_fp16 = const()[name = string("op_13319_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_461_cast_fp16 = layer_norm(axes = normed_461_axes_0, epsilon = var_13319_to_fp16, x = input_385)[name = string("normed_461_cast_fp16")]; - tensor normed_463_begin_0 = const()[name = string("normed_463_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_463_end_0 = const()[name = string("normed_463_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_463_end_mask_0 = const()[name = string("normed_463_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_463 = slice_by_index(begin = normed_463_begin_0, end = normed_463_end_0, end_mask = normed_463_end_mask_0, x = normed_461_cast_fp16)[name = string("normed_463")]; - tensor var_13338_to_fp16 = const()[name = string("op_13338_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414260224)))]; - tensor q_39_cast_fp16 = mul(x = normed_463, y = var_13338_to_fp16)[name = string("q_39_cast_fp16")]; - int32 var_13349 = const()[name = string("op_13349"), val = int32(-1)]; - fp16 const_730_promoted = const()[name = string("const_730_promoted"), val = fp16(-0x1p+0)]; - tensor var_13351 = mul(x = var_13285, y = const_730_promoted)[name = string("op_13351")]; - bool input_387_interleave_0 = const()[name = string("input_387_interleave_0"), val = bool(false)]; - tensor input_387 = concat(axis = var_13349, interleave = input_387_interleave_0, values = (var_13285, var_13351))[name = string("input_387")]; - tensor normed_465_axes_0 = const()[name = string("normed_465_axes_0"), val = tensor([-1])]; - fp16 var_13346_to_fp16 = const()[name = string("op_13346_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_465_cast_fp16 = layer_norm(axes = normed_465_axes_0, epsilon = var_13346_to_fp16, x = input_387)[name = string("normed_465_cast_fp16")]; - tensor normed_467_begin_0 = const()[name = string("normed_467_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_467_end_0 = const()[name = string("normed_467_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_467_end_mask_0 = const()[name = string("normed_467_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_467 = slice_by_index(begin = normed_467_begin_0, end = normed_467_end_0, end_mask = normed_467_end_mask_0, x = normed_465_cast_fp16)[name = string("normed_467")]; - tensor var_13365_to_fp16 = const()[name = string("op_13365_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414260800)))]; - tensor k_39_cast_fp16 = mul(x = normed_467, y = var_13365_to_fp16)[name = string("k_39_cast_fp16")]; - tensor var_13367_cast_fp16 = mul(x = q_39_cast_fp16, y = cos_1_cast_fp16)[name = string("op_13367_cast_fp16")]; - tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_77_cast_fp16 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_39_cast_fp16)[name = string("x1_77_cast_fp16")]; - tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_77_cast_fp16 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_39_cast_fp16)[name = string("x2_77_cast_fp16")]; - fp16 const_736_promoted_to_fp16 = const()[name = string("const_736_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13388_cast_fp16 = mul(x = x2_77_cast_fp16, y = const_736_promoted_to_fp16)[name = string("op_13388_cast_fp16")]; - int32 var_13390 = const()[name = string("op_13390"), val = int32(-1)]; - bool var_13391_interleave_0 = const()[name = string("op_13391_interleave_0"), val = bool(false)]; - tensor var_13391_cast_fp16 = concat(axis = var_13390, interleave = var_13391_interleave_0, values = (var_13388_cast_fp16, x1_77_cast_fp16))[name = string("op_13391_cast_fp16")]; - tensor var_13392_cast_fp16 = mul(x = var_13391_cast_fp16, y = sin_1_cast_fp16)[name = string("op_13392_cast_fp16")]; - tensor query_states_77_cast_fp16 = add(x = var_13367_cast_fp16, y = var_13392_cast_fp16)[name = string("query_states_77_cast_fp16")]; - tensor var_13395_cast_fp16 = mul(x = k_39_cast_fp16, y = cos_1_cast_fp16)[name = string("op_13395_cast_fp16")]; - tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_79_cast_fp16 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_39_cast_fp16)[name = string("x1_79_cast_fp16")]; - tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_79_cast_fp16 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_39_cast_fp16)[name = string("x2_79_cast_fp16")]; - fp16 const_739_promoted_to_fp16 = const()[name = string("const_739_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13416_cast_fp16 = mul(x = x2_79_cast_fp16, y = const_739_promoted_to_fp16)[name = string("op_13416_cast_fp16")]; - int32 var_13418 = const()[name = string("op_13418"), val = int32(-1)]; - bool var_13419_interleave_0 = const()[name = string("op_13419_interleave_0"), val = bool(false)]; - tensor var_13419_cast_fp16 = concat(axis = var_13418, interleave = var_13419_interleave_0, values = (var_13416_cast_fp16, x1_79_cast_fp16))[name = string("op_13419_cast_fp16")]; - tensor var_13420_cast_fp16 = mul(x = var_13419_cast_fp16, y = sin_1_cast_fp16)[name = string("op_13420_cast_fp16")]; - tensor key_states_77_cast_fp16 = add(x = var_13395_cast_fp16, y = var_13420_cast_fp16)[name = string("key_states_77_cast_fp16")]; - tensor expand_dims_228 = const()[name = string("expand_dims_228"), val = tensor([16])]; - tensor expand_dims_229 = const()[name = string("expand_dims_229"), val = tensor([0])]; - tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([0])]; - tensor expand_dims_232 = const()[name = string("expand_dims_232"), val = tensor([17])]; - int32 concat_154_axis_0 = const()[name = string("concat_154_axis_0"), val = int32(0)]; - bool concat_154_interleave_0 = const()[name = string("concat_154_interleave_0"), val = bool(false)]; - tensor concat_154 = concat(axis = concat_154_axis_0, interleave = concat_154_interleave_0, values = (expand_dims_228, expand_dims_229, current_pos, expand_dims_231))[name = string("concat_154")]; - tensor concat_155_values1_0 = const()[name = string("concat_155_values1_0"), val = tensor([0])]; - tensor concat_155_values3_0 = const()[name = string("concat_155_values3_0"), val = tensor([0])]; - int32 concat_155_axis_0 = const()[name = string("concat_155_axis_0"), val = int32(0)]; - bool concat_155_interleave_0 = const()[name = string("concat_155_interleave_0"), val = bool(false)]; - tensor concat_155 = concat(axis = concat_155_axis_0, interleave = concat_155_interleave_0, values = (expand_dims_232, concat_155_values1_0, var_1909, concat_155_values3_0))[name = string("concat_155")]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16 = slice_update(begin = concat_154, begin_mask = model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0, end = concat_155, end_mask = model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_33_stride_0, update = key_states_77_cast_fp16, x = coreml_update_state_89)[name = string("model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_38_write_state")]; - tensor coreml_update_state_90 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_38")]; - tensor expand_dims_234 = const()[name = string("expand_dims_234"), val = tensor([38])]; - tensor expand_dims_235 = const()[name = string("expand_dims_235"), val = tensor([0])]; - tensor expand_dims_237 = const()[name = string("expand_dims_237"), val = tensor([0])]; - tensor expand_dims_238 = const()[name = string("expand_dims_238"), val = tensor([39])]; - int32 concat_158_axis_0 = const()[name = string("concat_158_axis_0"), val = int32(0)]; - bool concat_158_interleave_0 = const()[name = string("concat_158_interleave_0"), val = bool(false)]; - tensor concat_158 = concat(axis = concat_158_axis_0, interleave = concat_158_interleave_0, values = (expand_dims_234, expand_dims_235, current_pos, expand_dims_237))[name = string("concat_158")]; - tensor concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor([0])]; - tensor concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor([0])]; - int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)]; - bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)]; - tensor concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (expand_dims_238, concat_159_values1_0, var_1909, concat_159_values3_0))[name = string("concat_159")]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16 = slice_update(begin = concat_158, begin_mask = model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0, end = concat_159, end_mask = model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_34_stride_0, update = var_13307, x = coreml_update_state_90)[name = string("model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_39_write_state")]; - tensor coreml_update_state_91 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_39")]; - tensor var_13475_begin_0 = const()[name = string("op_13475_begin_0"), val = tensor([16, 0, 0, 0])]; - tensor var_13475_end_0 = const()[name = string("op_13475_end_0"), val = tensor([17, 1, 512, 256])]; - tensor var_13475_end_mask_0 = const()[name = string("op_13475_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_13475_cast_fp16 = slice_by_index(begin = var_13475_begin_0, end = var_13475_end_0, end_mask = var_13475_end_mask_0, x = coreml_update_state_91)[name = string("op_13475_cast_fp16")]; - tensor var_13482_begin_0 = const()[name = string("op_13482_begin_0"), val = tensor([38, 0, 0, 0])]; - tensor var_13482_end_0 = const()[name = string("op_13482_end_0"), val = tensor([39, 1, 512, 256])]; - tensor var_13482_end_mask_0 = const()[name = string("op_13482_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_13482_cast_fp16 = slice_by_index(begin = var_13482_begin_0, end = var_13482_end_0, end_mask = var_13482_end_mask_0, x = coreml_update_state_91)[name = string("op_13482_cast_fp16")]; - tensor var_13519 = const()[name = string("op_13519"), val = tensor([1, 4, 1, 1])]; - tensor x_309_cast_fp16 = tile(reps = var_13519, x = var_13475_cast_fp16)[name = string("x_309_cast_fp16")]; - tensor var_13539 = const()[name = string("op_13539"), val = tensor([1, 4, 1, 1])]; - tensor x_315_cast_fp16 = tile(reps = var_13539, x = var_13482_cast_fp16)[name = string("x_315_cast_fp16")]; - bool var_13566_transpose_x_1 = const()[name = string("op_13566_transpose_x_1"), val = bool(false)]; - bool var_13566_transpose_y_1 = const()[name = string("op_13566_transpose_y_1"), val = bool(true)]; - tensor var_13566 = matmul(transpose_x = var_13566_transpose_x_1, transpose_y = var_13566_transpose_y_1, x = query_states_77_cast_fp16, y = x_309_cast_fp16)[name = string("op_13566")]; - fp16 var_13567_to_fp16 = const()[name = string("op_13567_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_115_cast_fp16 = mul(x = var_13566, y = var_13567_to_fp16)[name = string("attn_weights_115_cast_fp16")]; - tensor attn_weights_117_cast_fp16 = add(x = attn_weights_115_cast_fp16, y = var_2083)[name = string("attn_weights_117_cast_fp16")]; - int32 var_13602 = const()[name = string("op_13602"), val = int32(-1)]; - tensor attn_weights_119_cast_fp16 = softmax(axis = var_13602, x = attn_weights_117_cast_fp16)[name = string("attn_weights_119_cast_fp16")]; - bool attn_output_191_transpose_x_0 = const()[name = string("attn_output_191_transpose_x_0"), val = bool(false)]; - bool attn_output_191_transpose_y_0 = const()[name = string("attn_output_191_transpose_y_0"), val = bool(false)]; - tensor attn_output_191_cast_fp16 = matmul(transpose_x = attn_output_191_transpose_x_0, transpose_y = attn_output_191_transpose_y_0, x = attn_weights_119_cast_fp16, y = x_315_cast_fp16)[name = string("attn_output_191_cast_fp16")]; - tensor var_13613_perm_0 = const()[name = string("op_13613_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_13617 = const()[name = string("op_13617"), val = tensor([1, 1, 1024])]; - tensor var_13613_cast_fp16 = transpose(perm = var_13613_perm_0, x = attn_output_191_cast_fp16)[name = string("transpose_40")]; - tensor attn_output_195_cast_fp16 = reshape(shape = var_13617, x = var_13613_cast_fp16)[name = string("attn_output_195_cast_fp16")]; - tensor var_13622 = const()[name = string("op_13622"), val = tensor([0, 2, 1])]; - string var_13638_pad_type_0 = const()[name = string("op_13638_pad_type_0"), val = string("valid")]; - int32 var_13638_groups_0 = const()[name = string("op_13638_groups_0"), val = int32(1)]; - tensor var_13638_strides_0 = const()[name = string("op_13638_strides_0"), val = tensor([1])]; - tensor var_13638_pad_0 = const()[name = string("op_13638_pad_0"), val = tensor([0, 0])]; - tensor var_13638_dilations_0 = const()[name = string("op_13638_dilations_0"), val = tensor([1])]; - tensor squeeze_19_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414261376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415146176))))[name = string("squeeze_19_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_13623_cast_fp16 = transpose(perm = var_13622, x = attn_output_195_cast_fp16)[name = string("transpose_39")]; - tensor var_13638_cast_fp16 = conv(dilations = var_13638_dilations_0, groups = var_13638_groups_0, pad = var_13638_pad_0, pad_type = var_13638_pad_type_0, strides = var_13638_strides_0, weight = squeeze_19_cast_fp16_to_fp32_to_fp16_palettized, x = var_13623_cast_fp16)[name = string("op_13638_cast_fp16")]; - tensor var_13642 = const()[name = string("op_13642"), val = tensor([0, 2, 1])]; - int32 var_13653 = const()[name = string("op_13653"), val = int32(-1)]; - fp16 const_748_promoted_to_fp16 = const()[name = string("const_748_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_313_cast_fp16 = transpose(perm = var_13642, x = var_13638_cast_fp16)[name = string("transpose_38")]; - tensor var_13655_cast_fp16 = mul(x = hidden_states_313_cast_fp16, y = const_748_promoted_to_fp16)[name = string("op_13655_cast_fp16")]; - bool input_391_interleave_0 = const()[name = string("input_391_interleave_0"), val = bool(false)]; - tensor input_391_cast_fp16 = concat(axis = var_13653, interleave = input_391_interleave_0, values = (hidden_states_313_cast_fp16, var_13655_cast_fp16))[name = string("input_391_cast_fp16")]; - tensor normed_469_axes_0 = const()[name = string("normed_469_axes_0"), val = tensor([-1])]; - fp16 var_13650_to_fp16 = const()[name = string("op_13650_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_469_cast_fp16 = layer_norm(axes = normed_469_axes_0, epsilon = var_13650_to_fp16, x = input_391_cast_fp16)[name = string("normed_469_cast_fp16")]; - tensor normed_471_begin_0 = const()[name = string("normed_471_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_471_end_0 = const()[name = string("normed_471_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_471_end_mask_0 = const()[name = string("normed_471_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_471_cast_fp16 = slice_by_index(begin = normed_471_begin_0, end = normed_471_end_0, end_mask = normed_471_end_mask_0, x = normed_469_cast_fp16)[name = string("normed_471_cast_fp16")]; - tensor var_13669_to_fp16 = const()[name = string("op_13669_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415164672)))]; - tensor attn_output_199_cast_fp16 = mul(x = normed_471_cast_fp16, y = var_13669_to_fp16)[name = string("attn_output_199_cast_fp16")]; - tensor hidden_states_315_cast_fp16 = add(x = hidden_states_305_cast_fp16, y = attn_output_199_cast_fp16)[name = string("hidden_states_315_cast_fp16")]; - int32 var_13682 = const()[name = string("op_13682"), val = int32(-1)]; - fp16 const_752_promoted_to_fp16 = const()[name = string("const_752_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13684_cast_fp16 = mul(x = hidden_states_315_cast_fp16, y = const_752_promoted_to_fp16)[name = string("op_13684_cast_fp16")]; - bool input_393_interleave_0 = const()[name = string("input_393_interleave_0"), val = bool(false)]; - tensor input_393_cast_fp16 = concat(axis = var_13682, interleave = input_393_interleave_0, values = (hidden_states_315_cast_fp16, var_13684_cast_fp16))[name = string("input_393_cast_fp16")]; - tensor normed_473_axes_0 = const()[name = string("normed_473_axes_0"), val = tensor([-1])]; - fp16 var_13679_to_fp16 = const()[name = string("op_13679_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_473_cast_fp16 = layer_norm(axes = normed_473_axes_0, epsilon = var_13679_to_fp16, x = input_393_cast_fp16)[name = string("normed_473_cast_fp16")]; - tensor normed_475_begin_0 = const()[name = string("normed_475_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_475_end_0 = const()[name = string("normed_475_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_475_end_mask_0 = const()[name = string("normed_475_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_475_cast_fp16 = slice_by_index(begin = normed_475_begin_0, end = normed_475_end_0, end_mask = normed_475_end_mask_0, x = normed_473_cast_fp16)[name = string("normed_475_cast_fp16")]; - tensor var_13698_to_fp16 = const()[name = string("op_13698_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415167040)))]; - tensor x_317_cast_fp16 = mul(x = normed_475_cast_fp16, y = var_13698_to_fp16)[name = string("x_317_cast_fp16")]; - tensor var_13710 = const()[name = string("op_13710"), val = tensor([0, 2, 1])]; - tensor input_395_axes_0 = const()[name = string("input_395_axes_0"), val = tensor([2])]; - tensor var_13711_cast_fp16 = transpose(perm = var_13710, x = x_317_cast_fp16)[name = string("transpose_37")]; - tensor input_395_cast_fp16 = expand_dims(axes = input_395_axes_0, x = var_13711_cast_fp16)[name = string("input_395_cast_fp16")]; - string x_319_pad_type_0 = const()[name = string("x_319_pad_type_0"), val = string("valid")]; - tensor x_319_strides_0 = const()[name = string("x_319_strides_0"), val = tensor([1, 1])]; - tensor x_319_pad_0 = const()[name = string("x_319_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_319_dilations_0 = const()[name = string("x_319_dilations_0"), val = tensor([1, 1])]; - int32 x_319_groups_0 = const()[name = string("x_319_groups_0"), val = int32(1)]; - tensor model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415169408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421141440))))[name = string("model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_319_cast_fp16 = conv(dilations = x_319_dilations_0, groups = x_319_groups_0, pad = x_319_pad_0, pad_type = x_319_pad_type_0, strides = x_319_strides_0, weight = model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_395_cast_fp16)[name = string("x_319_cast_fp16")]; - string b_39_pad_type_0 = const()[name = string("b_39_pad_type_0"), val = string("valid")]; - tensor b_39_strides_0 = const()[name = string("b_39_strides_0"), val = tensor([1, 1])]; - tensor b_39_pad_0 = const()[name = string("b_39_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_39_dilations_0 = const()[name = string("b_39_dilations_0"), val = tensor([1, 1])]; - int32 b_39_groups_0 = const()[name = string("b_39_groups_0"), val = int32(1)]; - tensor model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421252096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427224128))))[name = string("model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_39_cast_fp16 = conv(dilations = b_39_dilations_0, groups = b_39_groups_0, pad = b_39_pad_0, pad_type = b_39_pad_type_0, strides = b_39_strides_0, weight = model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_395_cast_fp16)[name = string("b_39_cast_fp16")]; - string var_13736_mode_0 = const()[name = string("op_13736_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_13736_cast_fp16 = gelu(mode = var_13736_mode_0, x = x_319_cast_fp16)[name = string("op_13736_cast_fp16")]; - tensor input_397_cast_fp16 = mul(x = var_13736_cast_fp16, y = b_39_cast_fp16)[name = string("input_397_cast_fp16")]; - string e_39_pad_type_0 = const()[name = string("e_39_pad_type_0"), val = string("valid")]; - tensor e_39_strides_0 = const()[name = string("e_39_strides_0"), val = tensor([1, 1])]; - tensor e_39_pad_0 = const()[name = string("e_39_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_39_dilations_0 = const()[name = string("e_39_dilations_0"), val = tensor([1, 1])]; - int32 e_39_groups_0 = const()[name = string("e_39_groups_0"), val = int32(1)]; - tensor model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427334784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433306816))))[name = string("model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_39_cast_fp16 = conv(dilations = e_39_dilations_0, groups = e_39_groups_0, pad = e_39_pad_0, pad_type = e_39_pad_type_0, strides = e_39_strides_0, weight = model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_397_cast_fp16)[name = string("e_39_cast_fp16")]; - tensor var_13744_axes_0 = const()[name = string("op_13744_axes_0"), val = tensor([2])]; - tensor var_13744_cast_fp16 = squeeze(axes = var_13744_axes_0, x = e_39_cast_fp16)[name = string("op_13744_cast_fp16")]; - tensor var_13745 = const()[name = string("op_13745"), val = tensor([0, 2, 1])]; - int32 var_13756 = const()[name = string("op_13756"), val = int32(-1)]; - fp16 const_756_promoted_to_fp16 = const()[name = string("const_756_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_317_cast_fp16 = transpose(perm = var_13745, x = var_13744_cast_fp16)[name = string("transpose_36")]; - tensor var_13758_cast_fp16 = mul(x = hidden_states_317_cast_fp16, y = const_756_promoted_to_fp16)[name = string("op_13758_cast_fp16")]; - bool input_399_interleave_0 = const()[name = string("input_399_interleave_0"), val = bool(false)]; - tensor input_399_cast_fp16 = concat(axis = var_13756, interleave = input_399_interleave_0, values = (hidden_states_317_cast_fp16, var_13758_cast_fp16))[name = string("input_399_cast_fp16")]; - tensor normed_477_axes_0 = const()[name = string("normed_477_axes_0"), val = tensor([-1])]; - fp16 var_13753_to_fp16 = const()[name = string("op_13753_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_477_cast_fp16 = layer_norm(axes = normed_477_axes_0, epsilon = var_13753_to_fp16, x = input_399_cast_fp16)[name = string("normed_477_cast_fp16")]; - tensor normed_479_begin_0 = const()[name = string("normed_479_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_479_end_0 = const()[name = string("normed_479_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_479_end_mask_0 = const()[name = string("normed_479_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_479_cast_fp16 = slice_by_index(begin = normed_479_begin_0, end = normed_479_end_0, end_mask = normed_479_end_mask_0, x = normed_477_cast_fp16)[name = string("normed_479_cast_fp16")]; - tensor var_13772_to_fp16 = const()[name = string("op_13772_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433325312)))]; - tensor hidden_states_319_cast_fp16 = mul(x = normed_479_cast_fp16, y = var_13772_to_fp16)[name = string("hidden_states_319_cast_fp16")]; - tensor hidden_states_321_cast_fp16 = add(x = hidden_states_315_cast_fp16, y = hidden_states_319_cast_fp16)[name = string("hidden_states_321_cast_fp16")]; - int32 var_13823 = const()[name = string("op_13823"), val = int32(-1)]; - fp16 const_760_promoted_to_fp16 = const()[name = string("const_760_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13825_cast_fp16 = mul(x = hidden_states_321_cast_fp16, y = const_760_promoted_to_fp16)[name = string("op_13825_cast_fp16")]; - bool input_401_interleave_0 = const()[name = string("input_401_interleave_0"), val = bool(false)]; - tensor input_401_cast_fp16 = concat(axis = var_13823, interleave = input_401_interleave_0, values = (hidden_states_321_cast_fp16, var_13825_cast_fp16))[name = string("input_401_cast_fp16")]; - tensor normed_481_axes_0 = const()[name = string("normed_481_axes_0"), val = tensor([-1])]; - fp16 var_13820_to_fp16 = const()[name = string("op_13820_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_481_cast_fp16 = layer_norm(axes = normed_481_axes_0, epsilon = var_13820_to_fp16, x = input_401_cast_fp16)[name = string("normed_481_cast_fp16")]; - tensor normed_483_begin_0 = const()[name = string("normed_483_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_483_end_0 = const()[name = string("normed_483_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_483_end_mask_0 = const()[name = string("normed_483_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_483_cast_fp16 = slice_by_index(begin = normed_483_begin_0, end = normed_483_end_0, end_mask = normed_483_end_mask_0, x = normed_481_cast_fp16)[name = string("normed_483_cast_fp16")]; - tensor var_13839_to_fp16 = const()[name = string("op_13839_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433327680)))]; - tensor hidden_states_323_cast_fp16 = mul(x = normed_483_cast_fp16, y = var_13839_to_fp16)[name = string("hidden_states_323_cast_fp16")]; - tensor var_13844 = const()[name = string("op_13844"), val = tensor([0, 2, 1])]; - tensor var_13847_axes_0 = const()[name = string("op_13847_axes_0"), val = tensor([2])]; - tensor var_13845_cast_fp16 = transpose(perm = var_13844, x = hidden_states_323_cast_fp16)[name = string("transpose_35")]; - tensor var_13847_cast_fp16 = expand_dims(axes = var_13847_axes_0, x = var_13845_cast_fp16)[name = string("op_13847_cast_fp16")]; - string var_13863_pad_type_0 = const()[name = string("op_13863_pad_type_0"), val = string("valid")]; - tensor var_13863_strides_0 = const()[name = string("op_13863_strides_0"), val = tensor([1, 1])]; - tensor var_13863_pad_0 = const()[name = string("op_13863_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_13863_dilations_0 = const()[name = string("op_13863_dilations_0"), val = tensor([1, 1])]; - int32 var_13863_groups_0 = const()[name = string("op_13863_groups_0"), val = int32(1)]; - tensor var_13863 = conv(dilations = var_13863_dilations_0, groups = var_13863_groups_0, pad = var_13863_pad_0, pad_type = var_13863_pad_type_0, strides = var_13863_strides_0, weight = model_model_layers_20_self_attn_q_proj_weight_palettized, x = var_13847_cast_fp16)[name = string("op_13863")]; - tensor var_13868 = const()[name = string("op_13868"), val = tensor([1, 4, 1, 256])]; - tensor var_13869 = reshape(shape = var_13868, x = var_13863)[name = string("op_13869")]; - string var_13885_pad_type_0 = const()[name = string("op_13885_pad_type_0"), val = string("valid")]; - tensor var_13885_strides_0 = const()[name = string("op_13885_strides_0"), val = tensor([1, 1])]; - tensor var_13885_pad_0 = const()[name = string("op_13885_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_13885_dilations_0 = const()[name = string("op_13885_dilations_0"), val = tensor([1, 1])]; - int32 var_13885_groups_0 = const()[name = string("op_13885_groups_0"), val = int32(1)]; - tensor var_13885 = conv(dilations = var_13885_dilations_0, groups = var_13885_groups_0, pad = var_13885_pad_0, pad_type = var_13885_pad_type_0, strides = var_13885_strides_0, weight = model_model_layers_20_self_attn_k_proj_weight_palettized, x = var_13847_cast_fp16)[name = string("op_13885")]; - tensor var_13890 = const()[name = string("op_13890"), val = tensor([1, 1, 1, 256])]; - tensor var_13891 = reshape(shape = var_13890, x = var_13885)[name = string("op_13891")]; - string var_13907_pad_type_0 = const()[name = string("op_13907_pad_type_0"), val = string("valid")]; - tensor var_13907_strides_0 = const()[name = string("op_13907_strides_0"), val = tensor([1, 1])]; - tensor var_13907_pad_0 = const()[name = string("op_13907_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_13907_dilations_0 = const()[name = string("op_13907_dilations_0"), val = tensor([1, 1])]; - int32 var_13907_groups_0 = const()[name = string("op_13907_groups_0"), val = int32(1)]; - tensor var_13907 = conv(dilations = var_13907_dilations_0, groups = var_13907_groups_0, pad = var_13907_pad_0, pad_type = var_13907_pad_type_0, strides = var_13907_strides_0, weight = model_model_layers_20_self_attn_v_proj_weight_palettized, x = var_13847_cast_fp16)[name = string("op_13907")]; - tensor var_13912 = const()[name = string("op_13912"), val = tensor([1, 1, 1, 256])]; - tensor var_13913 = reshape(shape = var_13912, x = var_13907)[name = string("op_13913")]; - int32 var_13928 = const()[name = string("op_13928"), val = int32(-1)]; - fp16 const_764_promoted = const()[name = string("const_764_promoted"), val = fp16(-0x1p+0)]; - tensor var_13930 = mul(x = var_13869, y = const_764_promoted)[name = string("op_13930")]; - bool input_405_interleave_0 = const()[name = string("input_405_interleave_0"), val = bool(false)]; - tensor input_405 = concat(axis = var_13928, interleave = input_405_interleave_0, values = (var_13869, var_13930))[name = string("input_405")]; - tensor normed_485_axes_0 = const()[name = string("normed_485_axes_0"), val = tensor([-1])]; - fp16 var_13925_to_fp16 = const()[name = string("op_13925_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_485_cast_fp16 = layer_norm(axes = normed_485_axes_0, epsilon = var_13925_to_fp16, x = input_405)[name = string("normed_485_cast_fp16")]; - tensor normed_487_begin_0 = const()[name = string("normed_487_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_487_end_0 = const()[name = string("normed_487_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_487_end_mask_0 = const()[name = string("normed_487_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_487 = slice_by_index(begin = normed_487_begin_0, end = normed_487_end_0, end_mask = normed_487_end_mask_0, x = normed_485_cast_fp16)[name = string("normed_487")]; - tensor var_13944_to_fp16 = const()[name = string("op_13944_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433330048)))]; - tensor q_41_cast_fp16 = mul(x = normed_487, y = var_13944_to_fp16)[name = string("q_41_cast_fp16")]; - int32 var_13955 = const()[name = string("op_13955"), val = int32(-1)]; - fp16 const_768_promoted = const()[name = string("const_768_promoted"), val = fp16(-0x1p+0)]; - tensor var_13957 = mul(x = var_13891, y = const_768_promoted)[name = string("op_13957")]; - bool input_407_interleave_0 = const()[name = string("input_407_interleave_0"), val = bool(false)]; - tensor input_407 = concat(axis = var_13955, interleave = input_407_interleave_0, values = (var_13891, var_13957))[name = string("input_407")]; - tensor normed_489_axes_0 = const()[name = string("normed_489_axes_0"), val = tensor([-1])]; - fp16 var_13952_to_fp16 = const()[name = string("op_13952_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_489_cast_fp16 = layer_norm(axes = normed_489_axes_0, epsilon = var_13952_to_fp16, x = input_407)[name = string("normed_489_cast_fp16")]; - tensor normed_491_begin_0 = const()[name = string("normed_491_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_491_end_0 = const()[name = string("normed_491_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_491_end_mask_0 = const()[name = string("normed_491_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_491 = slice_by_index(begin = normed_491_begin_0, end = normed_491_end_0, end_mask = normed_491_end_mask_0, x = normed_489_cast_fp16)[name = string("normed_491")]; - tensor var_13971_to_fp16 = const()[name = string("op_13971_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433330624)))]; - tensor k_41_cast_fp16 = mul(x = normed_491, y = var_13971_to_fp16)[name = string("k_41_cast_fp16")]; - tensor var_13973_cast_fp16 = mul(x = q_41_cast_fp16, y = cos_1_cast_fp16)[name = string("op_13973_cast_fp16")]; - tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_81_cast_fp16 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_41_cast_fp16)[name = string("x1_81_cast_fp16")]; - tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_81_cast_fp16 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_41_cast_fp16)[name = string("x2_81_cast_fp16")]; - fp16 const_774_promoted_to_fp16 = const()[name = string("const_774_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13994_cast_fp16 = mul(x = x2_81_cast_fp16, y = const_774_promoted_to_fp16)[name = string("op_13994_cast_fp16")]; - int32 var_13996 = const()[name = string("op_13996"), val = int32(-1)]; - bool var_13997_interleave_0 = const()[name = string("op_13997_interleave_0"), val = bool(false)]; - tensor var_13997_cast_fp16 = concat(axis = var_13996, interleave = var_13997_interleave_0, values = (var_13994_cast_fp16, x1_81_cast_fp16))[name = string("op_13997_cast_fp16")]; - tensor var_13998_cast_fp16 = mul(x = var_13997_cast_fp16, y = sin_1_cast_fp16)[name = string("op_13998_cast_fp16")]; - tensor query_states_81_cast_fp16 = add(x = var_13973_cast_fp16, y = var_13998_cast_fp16)[name = string("query_states_81_cast_fp16")]; - tensor var_14001_cast_fp16 = mul(x = k_41_cast_fp16, y = cos_1_cast_fp16)[name = string("op_14001_cast_fp16")]; - tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_83_cast_fp16 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_41_cast_fp16)[name = string("x1_83_cast_fp16")]; - tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_83_cast_fp16 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_41_cast_fp16)[name = string("x2_83_cast_fp16")]; - fp16 const_777_promoted_to_fp16 = const()[name = string("const_777_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14022_cast_fp16 = mul(x = x2_83_cast_fp16, y = const_777_promoted_to_fp16)[name = string("op_14022_cast_fp16")]; - int32 var_14024 = const()[name = string("op_14024"), val = int32(-1)]; - bool var_14025_interleave_0 = const()[name = string("op_14025_interleave_0"), val = bool(false)]; - tensor var_14025_cast_fp16 = concat(axis = var_14024, interleave = var_14025_interleave_0, values = (var_14022_cast_fp16, x1_83_cast_fp16))[name = string("op_14025_cast_fp16")]; - tensor var_14026_cast_fp16 = mul(x = var_14025_cast_fp16, y = sin_1_cast_fp16)[name = string("op_14026_cast_fp16")]; - tensor key_states_81_cast_fp16 = add(x = var_14001_cast_fp16, y = var_14026_cast_fp16)[name = string("key_states_81_cast_fp16")]; - tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([17])]; - tensor expand_dims_241 = const()[name = string("expand_dims_241"), val = tensor([0])]; - tensor expand_dims_243 = const()[name = string("expand_dims_243"), val = tensor([0])]; - tensor expand_dims_244 = const()[name = string("expand_dims_244"), val = tensor([18])]; - int32 concat_162_axis_0 = const()[name = string("concat_162_axis_0"), val = int32(0)]; - bool concat_162_interleave_0 = const()[name = string("concat_162_interleave_0"), val = bool(false)]; - tensor concat_162 = concat(axis = concat_162_axis_0, interleave = concat_162_interleave_0, values = (expand_dims_240, expand_dims_241, current_pos, expand_dims_243))[name = string("concat_162")]; - tensor concat_163_values1_0 = const()[name = string("concat_163_values1_0"), val = tensor([0])]; - tensor concat_163_values3_0 = const()[name = string("concat_163_values3_0"), val = tensor([0])]; - int32 concat_163_axis_0 = const()[name = string("concat_163_axis_0"), val = int32(0)]; - bool concat_163_interleave_0 = const()[name = string("concat_163_interleave_0"), val = bool(false)]; - tensor concat_163 = concat(axis = concat_163_axis_0, interleave = concat_163_interleave_0, values = (expand_dims_244, concat_163_values1_0, var_1909, concat_163_values3_0))[name = string("concat_163")]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16 = slice_update(begin = concat_162, begin_mask = model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0, end = concat_163, end_mask = model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_35_stride_0, update = key_states_81_cast_fp16, x = coreml_update_state_91)[name = string("model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_40_write_state")]; - tensor coreml_update_state_92 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_40")]; - tensor expand_dims_246 = const()[name = string("expand_dims_246"), val = tensor([39])]; - tensor expand_dims_247 = const()[name = string("expand_dims_247"), val = tensor([0])]; - tensor expand_dims_249 = const()[name = string("expand_dims_249"), val = tensor([0])]; - tensor expand_dims_250 = const()[name = string("expand_dims_250"), val = tensor([40])]; - int32 concat_166_axis_0 = const()[name = string("concat_166_axis_0"), val = int32(0)]; - bool concat_166_interleave_0 = const()[name = string("concat_166_interleave_0"), val = bool(false)]; - tensor concat_166 = concat(axis = concat_166_axis_0, interleave = concat_166_interleave_0, values = (expand_dims_246, expand_dims_247, current_pos, expand_dims_249))[name = string("concat_166")]; - tensor concat_167_values1_0 = const()[name = string("concat_167_values1_0"), val = tensor([0])]; - tensor concat_167_values3_0 = const()[name = string("concat_167_values3_0"), val = tensor([0])]; - int32 concat_167_axis_0 = const()[name = string("concat_167_axis_0"), val = int32(0)]; - bool concat_167_interleave_0 = const()[name = string("concat_167_interleave_0"), val = bool(false)]; - tensor concat_167 = concat(axis = concat_167_axis_0, interleave = concat_167_interleave_0, values = (expand_dims_250, concat_167_values1_0, var_1909, concat_167_values3_0))[name = string("concat_167")]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16 = slice_update(begin = concat_166, begin_mask = model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0, end = concat_167, end_mask = model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_36_stride_0, update = var_13913, x = coreml_update_state_92)[name = string("model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_41_write_state")]; - tensor coreml_update_state_93 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_41")]; - tensor var_14081_begin_0 = const()[name = string("op_14081_begin_0"), val = tensor([17, 0, 0, 0])]; - tensor var_14081_end_0 = const()[name = string("op_14081_end_0"), val = tensor([18, 1, 512, 256])]; - tensor var_14081_end_mask_0 = const()[name = string("op_14081_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_14081_cast_fp16 = slice_by_index(begin = var_14081_begin_0, end = var_14081_end_0, end_mask = var_14081_end_mask_0, x = coreml_update_state_93)[name = string("op_14081_cast_fp16")]; - tensor var_14088_begin_0 = const()[name = string("op_14088_begin_0"), val = tensor([39, 0, 0, 0])]; - tensor var_14088_end_0 = const()[name = string("op_14088_end_0"), val = tensor([40, 1, 512, 256])]; - tensor var_14088_end_mask_0 = const()[name = string("op_14088_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_14088_cast_fp16 = slice_by_index(begin = var_14088_begin_0, end = var_14088_end_0, end_mask = var_14088_end_mask_0, x = coreml_update_state_93)[name = string("op_14088_cast_fp16")]; - tensor var_14125 = const()[name = string("op_14125"), val = tensor([1, 4, 1, 1])]; - tensor x_325_cast_fp16 = tile(reps = var_14125, x = var_14081_cast_fp16)[name = string("x_325_cast_fp16")]; - tensor var_14145 = const()[name = string("op_14145"), val = tensor([1, 4, 1, 1])]; - tensor x_331_cast_fp16 = tile(reps = var_14145, x = var_14088_cast_fp16)[name = string("x_331_cast_fp16")]; - bool var_14172_transpose_x_1 = const()[name = string("op_14172_transpose_x_1"), val = bool(false)]; - bool var_14172_transpose_y_1 = const()[name = string("op_14172_transpose_y_1"), val = bool(true)]; - tensor var_14172 = matmul(transpose_x = var_14172_transpose_x_1, transpose_y = var_14172_transpose_y_1, x = query_states_81_cast_fp16, y = x_325_cast_fp16)[name = string("op_14172")]; - fp16 var_14173_to_fp16 = const()[name = string("op_14173_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_121_cast_fp16 = mul(x = var_14172, y = var_14173_to_fp16)[name = string("attn_weights_121_cast_fp16")]; - tensor attn_weights_123_cast_fp16 = add(x = attn_weights_121_cast_fp16, y = var_2083)[name = string("attn_weights_123_cast_fp16")]; - int32 var_14208 = const()[name = string("op_14208"), val = int32(-1)]; - tensor attn_weights_125_cast_fp16 = softmax(axis = var_14208, x = attn_weights_123_cast_fp16)[name = string("attn_weights_125_cast_fp16")]; - bool attn_output_201_transpose_x_0 = const()[name = string("attn_output_201_transpose_x_0"), val = bool(false)]; - bool attn_output_201_transpose_y_0 = const()[name = string("attn_output_201_transpose_y_0"), val = bool(false)]; - tensor attn_output_201_cast_fp16 = matmul(transpose_x = attn_output_201_transpose_x_0, transpose_y = attn_output_201_transpose_y_0, x = attn_weights_125_cast_fp16, y = x_331_cast_fp16)[name = string("attn_output_201_cast_fp16")]; - tensor var_14219_perm_0 = const()[name = string("op_14219_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_14223 = const()[name = string("op_14223"), val = tensor([1, 1, 1024])]; - tensor var_14219_cast_fp16 = transpose(perm = var_14219_perm_0, x = attn_output_201_cast_fp16)[name = string("transpose_34")]; - tensor attn_output_205_cast_fp16 = reshape(shape = var_14223, x = var_14219_cast_fp16)[name = string("attn_output_205_cast_fp16")]; - tensor var_14228 = const()[name = string("op_14228"), val = tensor([0, 2, 1])]; - string var_14244_pad_type_0 = const()[name = string("op_14244_pad_type_0"), val = string("valid")]; - int32 var_14244_groups_0 = const()[name = string("op_14244_groups_0"), val = int32(1)]; - tensor var_14244_strides_0 = const()[name = string("op_14244_strides_0"), val = tensor([1])]; - tensor var_14244_pad_0 = const()[name = string("op_14244_pad_0"), val = tensor([0, 0])]; - tensor var_14244_dilations_0 = const()[name = string("op_14244_dilations_0"), val = tensor([1])]; - tensor squeeze_20_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433331200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434216000))))[name = string("squeeze_20_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_14229_cast_fp16 = transpose(perm = var_14228, x = attn_output_205_cast_fp16)[name = string("transpose_33")]; - tensor var_14244_cast_fp16 = conv(dilations = var_14244_dilations_0, groups = var_14244_groups_0, pad = var_14244_pad_0, pad_type = var_14244_pad_type_0, strides = var_14244_strides_0, weight = squeeze_20_cast_fp16_to_fp32_to_fp16_palettized, x = var_14229_cast_fp16)[name = string("op_14244_cast_fp16")]; - tensor var_14248 = const()[name = string("op_14248"), val = tensor([0, 2, 1])]; - int32 var_14259 = const()[name = string("op_14259"), val = int32(-1)]; - fp16 const_786_promoted_to_fp16 = const()[name = string("const_786_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_329_cast_fp16 = transpose(perm = var_14248, x = var_14244_cast_fp16)[name = string("transpose_32")]; - tensor var_14261_cast_fp16 = mul(x = hidden_states_329_cast_fp16, y = const_786_promoted_to_fp16)[name = string("op_14261_cast_fp16")]; - bool input_411_interleave_0 = const()[name = string("input_411_interleave_0"), val = bool(false)]; - tensor input_411_cast_fp16 = concat(axis = var_14259, interleave = input_411_interleave_0, values = (hidden_states_329_cast_fp16, var_14261_cast_fp16))[name = string("input_411_cast_fp16")]; - tensor normed_493_axes_0 = const()[name = string("normed_493_axes_0"), val = tensor([-1])]; - fp16 var_14256_to_fp16 = const()[name = string("op_14256_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_493_cast_fp16 = layer_norm(axes = normed_493_axes_0, epsilon = var_14256_to_fp16, x = input_411_cast_fp16)[name = string("normed_493_cast_fp16")]; - tensor normed_495_begin_0 = const()[name = string("normed_495_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_495_end_0 = const()[name = string("normed_495_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_495_end_mask_0 = const()[name = string("normed_495_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_495_cast_fp16 = slice_by_index(begin = normed_495_begin_0, end = normed_495_end_0, end_mask = normed_495_end_mask_0, x = normed_493_cast_fp16)[name = string("normed_495_cast_fp16")]; - tensor var_14275_to_fp16 = const()[name = string("op_14275_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434234496)))]; - tensor attn_output_209_cast_fp16 = mul(x = normed_495_cast_fp16, y = var_14275_to_fp16)[name = string("attn_output_209_cast_fp16")]; - tensor hidden_states_331_cast_fp16 = add(x = hidden_states_321_cast_fp16, y = attn_output_209_cast_fp16)[name = string("hidden_states_331_cast_fp16")]; - int32 var_14288 = const()[name = string("op_14288"), val = int32(-1)]; - fp16 const_790_promoted_to_fp16 = const()[name = string("const_790_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14290_cast_fp16 = mul(x = hidden_states_331_cast_fp16, y = const_790_promoted_to_fp16)[name = string("op_14290_cast_fp16")]; - bool input_413_interleave_0 = const()[name = string("input_413_interleave_0"), val = bool(false)]; - tensor input_413_cast_fp16 = concat(axis = var_14288, interleave = input_413_interleave_0, values = (hidden_states_331_cast_fp16, var_14290_cast_fp16))[name = string("input_413_cast_fp16")]; - tensor normed_497_axes_0 = const()[name = string("normed_497_axes_0"), val = tensor([-1])]; - fp16 var_14285_to_fp16 = const()[name = string("op_14285_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_497_cast_fp16 = layer_norm(axes = normed_497_axes_0, epsilon = var_14285_to_fp16, x = input_413_cast_fp16)[name = string("normed_497_cast_fp16")]; - tensor normed_499_begin_0 = const()[name = string("normed_499_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_499_end_0 = const()[name = string("normed_499_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_499_end_mask_0 = const()[name = string("normed_499_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_499_cast_fp16 = slice_by_index(begin = normed_499_begin_0, end = normed_499_end_0, end_mask = normed_499_end_mask_0, x = normed_497_cast_fp16)[name = string("normed_499_cast_fp16")]; - tensor var_14304_to_fp16 = const()[name = string("op_14304_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434236864)))]; - tensor x_333_cast_fp16 = mul(x = normed_499_cast_fp16, y = var_14304_to_fp16)[name = string("x_333_cast_fp16")]; - tensor var_14316 = const()[name = string("op_14316"), val = tensor([0, 2, 1])]; - tensor input_415_axes_0 = const()[name = string("input_415_axes_0"), val = tensor([2])]; - tensor var_14317_cast_fp16 = transpose(perm = var_14316, x = x_333_cast_fp16)[name = string("transpose_31")]; - tensor input_415_cast_fp16 = expand_dims(axes = input_415_axes_0, x = var_14317_cast_fp16)[name = string("input_415_cast_fp16")]; - string x_335_pad_type_0 = const()[name = string("x_335_pad_type_0"), val = string("valid")]; - tensor x_335_strides_0 = const()[name = string("x_335_strides_0"), val = tensor([1, 1])]; - tensor x_335_pad_0 = const()[name = string("x_335_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_335_dilations_0 = const()[name = string("x_335_dilations_0"), val = tensor([1, 1])]; - int32 x_335_groups_0 = const()[name = string("x_335_groups_0"), val = int32(1)]; - tensor model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434239232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440211264))))[name = string("model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_335_cast_fp16 = conv(dilations = x_335_dilations_0, groups = x_335_groups_0, pad = x_335_pad_0, pad_type = x_335_pad_type_0, strides = x_335_strides_0, weight = model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_415_cast_fp16)[name = string("x_335_cast_fp16")]; - string b_41_pad_type_0 = const()[name = string("b_41_pad_type_0"), val = string("valid")]; - tensor b_41_strides_0 = const()[name = string("b_41_strides_0"), val = tensor([1, 1])]; - tensor b_41_pad_0 = const()[name = string("b_41_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_41_dilations_0 = const()[name = string("b_41_dilations_0"), val = tensor([1, 1])]; - int32 b_41_groups_0 = const()[name = string("b_41_groups_0"), val = int32(1)]; - tensor model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440321920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446293952))))[name = string("model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_41_cast_fp16 = conv(dilations = b_41_dilations_0, groups = b_41_groups_0, pad = b_41_pad_0, pad_type = b_41_pad_type_0, strides = b_41_strides_0, weight = model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_415_cast_fp16)[name = string("b_41_cast_fp16")]; - string var_14342_mode_0 = const()[name = string("op_14342_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_14342_cast_fp16 = gelu(mode = var_14342_mode_0, x = x_335_cast_fp16)[name = string("op_14342_cast_fp16")]; - tensor input_417_cast_fp16 = mul(x = var_14342_cast_fp16, y = b_41_cast_fp16)[name = string("input_417_cast_fp16")]; - string e_41_pad_type_0 = const()[name = string("e_41_pad_type_0"), val = string("valid")]; - tensor e_41_strides_0 = const()[name = string("e_41_strides_0"), val = tensor([1, 1])]; - tensor e_41_pad_0 = const()[name = string("e_41_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_41_dilations_0 = const()[name = string("e_41_dilations_0"), val = tensor([1, 1])]; - int32 e_41_groups_0 = const()[name = string("e_41_groups_0"), val = int32(1)]; - tensor model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446404608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452376640))))[name = string("model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_41_cast_fp16 = conv(dilations = e_41_dilations_0, groups = e_41_groups_0, pad = e_41_pad_0, pad_type = e_41_pad_type_0, strides = e_41_strides_0, weight = model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_417_cast_fp16)[name = string("e_41_cast_fp16")]; - tensor var_14350_axes_0 = const()[name = string("op_14350_axes_0"), val = tensor([2])]; - tensor var_14350_cast_fp16 = squeeze(axes = var_14350_axes_0, x = e_41_cast_fp16)[name = string("op_14350_cast_fp16")]; - tensor var_14351 = const()[name = string("op_14351"), val = tensor([0, 2, 1])]; - int32 var_14362 = const()[name = string("op_14362"), val = int32(-1)]; - fp16 const_794_promoted_to_fp16 = const()[name = string("const_794_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_333_cast_fp16 = transpose(perm = var_14351, x = var_14350_cast_fp16)[name = string("transpose_30")]; - tensor var_14364_cast_fp16 = mul(x = hidden_states_333_cast_fp16, y = const_794_promoted_to_fp16)[name = string("op_14364_cast_fp16")]; - bool input_419_interleave_0 = const()[name = string("input_419_interleave_0"), val = bool(false)]; - tensor input_419_cast_fp16 = concat(axis = var_14362, interleave = input_419_interleave_0, values = (hidden_states_333_cast_fp16, var_14364_cast_fp16))[name = string("input_419_cast_fp16")]; - tensor normed_501_axes_0 = const()[name = string("normed_501_axes_0"), val = tensor([-1])]; - fp16 var_14359_to_fp16 = const()[name = string("op_14359_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_501_cast_fp16 = layer_norm(axes = normed_501_axes_0, epsilon = var_14359_to_fp16, x = input_419_cast_fp16)[name = string("normed_501_cast_fp16")]; - tensor normed_503_begin_0 = const()[name = string("normed_503_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_503_end_0 = const()[name = string("normed_503_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_503_end_mask_0 = const()[name = string("normed_503_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_503_cast_fp16 = slice_by_index(begin = normed_503_begin_0, end = normed_503_end_0, end_mask = normed_503_end_mask_0, x = normed_501_cast_fp16)[name = string("normed_503_cast_fp16")]; - tensor var_14378_to_fp16 = const()[name = string("op_14378_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452395136)))]; - tensor hidden_states_335_cast_fp16 = mul(x = normed_503_cast_fp16, y = var_14378_to_fp16)[name = string("hidden_states_335_cast_fp16")]; - tensor hidden_states_337_cast_fp16 = add(x = hidden_states_331_cast_fp16, y = hidden_states_335_cast_fp16)[name = string("hidden_states_337_cast_fp16")]; - int32 var_14429 = const()[name = string("op_14429"), val = int32(-1)]; - fp16 const_798_promoted_to_fp16 = const()[name = string("const_798_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14431_cast_fp16 = mul(x = hidden_states_337_cast_fp16, y = const_798_promoted_to_fp16)[name = string("op_14431_cast_fp16")]; - bool input_421_interleave_0 = const()[name = string("input_421_interleave_0"), val = bool(false)]; - tensor input_421_cast_fp16 = concat(axis = var_14429, interleave = input_421_interleave_0, values = (hidden_states_337_cast_fp16, var_14431_cast_fp16))[name = string("input_421_cast_fp16")]; - tensor normed_505_axes_0 = const()[name = string("normed_505_axes_0"), val = tensor([-1])]; - fp16 var_14426_to_fp16 = const()[name = string("op_14426_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_505_cast_fp16 = layer_norm(axes = normed_505_axes_0, epsilon = var_14426_to_fp16, x = input_421_cast_fp16)[name = string("normed_505_cast_fp16")]; - tensor normed_507_begin_0 = const()[name = string("normed_507_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_507_end_0 = const()[name = string("normed_507_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_507_end_mask_0 = const()[name = string("normed_507_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_507_cast_fp16 = slice_by_index(begin = normed_507_begin_0, end = normed_507_end_0, end_mask = normed_507_end_mask_0, x = normed_505_cast_fp16)[name = string("normed_507_cast_fp16")]; - tensor var_14445_to_fp16 = const()[name = string("op_14445_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452397504)))]; - tensor hidden_states_339_cast_fp16 = mul(x = normed_507_cast_fp16, y = var_14445_to_fp16)[name = string("hidden_states_339_cast_fp16")]; - tensor var_14450 = const()[name = string("op_14450"), val = tensor([0, 2, 1])]; - tensor var_14453_axes_0 = const()[name = string("op_14453_axes_0"), val = tensor([2])]; - tensor var_14451_cast_fp16 = transpose(perm = var_14450, x = hidden_states_339_cast_fp16)[name = string("transpose_29")]; - tensor var_14453_cast_fp16 = expand_dims(axes = var_14453_axes_0, x = var_14451_cast_fp16)[name = string("op_14453_cast_fp16")]; - string var_14469_pad_type_0 = const()[name = string("op_14469_pad_type_0"), val = string("valid")]; - tensor var_14469_strides_0 = const()[name = string("op_14469_strides_0"), val = tensor([1, 1])]; - tensor var_14469_pad_0 = const()[name = string("op_14469_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_14469_dilations_0 = const()[name = string("op_14469_dilations_0"), val = tensor([1, 1])]; - int32 var_14469_groups_0 = const()[name = string("op_14469_groups_0"), val = int32(1)]; - tensor var_14469 = conv(dilations = var_14469_dilations_0, groups = var_14469_groups_0, pad = var_14469_pad_0, pad_type = var_14469_pad_type_0, strides = var_14469_strides_0, weight = model_model_layers_21_self_attn_q_proj_weight_palettized, x = var_14453_cast_fp16)[name = string("op_14469")]; - tensor var_14474 = const()[name = string("op_14474"), val = tensor([1, 4, 1, 256])]; - tensor var_14475 = reshape(shape = var_14474, x = var_14469)[name = string("op_14475")]; - string var_14491_pad_type_0 = const()[name = string("op_14491_pad_type_0"), val = string("valid")]; - tensor var_14491_strides_0 = const()[name = string("op_14491_strides_0"), val = tensor([1, 1])]; - tensor var_14491_pad_0 = const()[name = string("op_14491_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_14491_dilations_0 = const()[name = string("op_14491_dilations_0"), val = tensor([1, 1])]; - int32 var_14491_groups_0 = const()[name = string("op_14491_groups_0"), val = int32(1)]; - tensor var_14491 = conv(dilations = var_14491_dilations_0, groups = var_14491_groups_0, pad = var_14491_pad_0, pad_type = var_14491_pad_type_0, strides = var_14491_strides_0, weight = model_model_layers_21_self_attn_k_proj_weight_palettized, x = var_14453_cast_fp16)[name = string("op_14491")]; - tensor var_14496 = const()[name = string("op_14496"), val = tensor([1, 1, 1, 256])]; - tensor var_14497 = reshape(shape = var_14496, x = var_14491)[name = string("op_14497")]; - string var_14513_pad_type_0 = const()[name = string("op_14513_pad_type_0"), val = string("valid")]; - tensor var_14513_strides_0 = const()[name = string("op_14513_strides_0"), val = tensor([1, 1])]; - tensor var_14513_pad_0 = const()[name = string("op_14513_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_14513_dilations_0 = const()[name = string("op_14513_dilations_0"), val = tensor([1, 1])]; - int32 var_14513_groups_0 = const()[name = string("op_14513_groups_0"), val = int32(1)]; - tensor var_14513 = conv(dilations = var_14513_dilations_0, groups = var_14513_groups_0, pad = var_14513_pad_0, pad_type = var_14513_pad_type_0, strides = var_14513_strides_0, weight = model_model_layers_21_self_attn_v_proj_weight_palettized, x = var_14453_cast_fp16)[name = string("op_14513")]; - tensor var_14518 = const()[name = string("op_14518"), val = tensor([1, 1, 1, 256])]; - tensor var_14519 = reshape(shape = var_14518, x = var_14513)[name = string("op_14519")]; - int32 var_14534 = const()[name = string("op_14534"), val = int32(-1)]; - fp16 const_802_promoted = const()[name = string("const_802_promoted"), val = fp16(-0x1p+0)]; - tensor var_14536 = mul(x = var_14475, y = const_802_promoted)[name = string("op_14536")]; - bool input_425_interleave_0 = const()[name = string("input_425_interleave_0"), val = bool(false)]; - tensor input_425 = concat(axis = var_14534, interleave = input_425_interleave_0, values = (var_14475, var_14536))[name = string("input_425")]; - tensor normed_509_axes_0 = const()[name = string("normed_509_axes_0"), val = tensor([-1])]; - fp16 var_14531_to_fp16 = const()[name = string("op_14531_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_509_cast_fp16 = layer_norm(axes = normed_509_axes_0, epsilon = var_14531_to_fp16, x = input_425)[name = string("normed_509_cast_fp16")]; - tensor normed_511_begin_0 = const()[name = string("normed_511_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_511_end_0 = const()[name = string("normed_511_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_511_end_mask_0 = const()[name = string("normed_511_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_511 = slice_by_index(begin = normed_511_begin_0, end = normed_511_end_0, end_mask = normed_511_end_mask_0, x = normed_509_cast_fp16)[name = string("normed_511")]; - tensor var_14550_to_fp16 = const()[name = string("op_14550_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452399872)))]; - tensor q_43_cast_fp16 = mul(x = normed_511, y = var_14550_to_fp16)[name = string("q_43_cast_fp16")]; - int32 var_14561 = const()[name = string("op_14561"), val = int32(-1)]; - fp16 const_806_promoted = const()[name = string("const_806_promoted"), val = fp16(-0x1p+0)]; - tensor var_14563 = mul(x = var_14497, y = const_806_promoted)[name = string("op_14563")]; - bool input_427_interleave_0 = const()[name = string("input_427_interleave_0"), val = bool(false)]; - tensor input_427 = concat(axis = var_14561, interleave = input_427_interleave_0, values = (var_14497, var_14563))[name = string("input_427")]; - tensor normed_513_axes_0 = const()[name = string("normed_513_axes_0"), val = tensor([-1])]; - fp16 var_14558_to_fp16 = const()[name = string("op_14558_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_513_cast_fp16 = layer_norm(axes = normed_513_axes_0, epsilon = var_14558_to_fp16, x = input_427)[name = string("normed_513_cast_fp16")]; - tensor normed_515_begin_0 = const()[name = string("normed_515_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_515_end_0 = const()[name = string("normed_515_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_515_end_mask_0 = const()[name = string("normed_515_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_515 = slice_by_index(begin = normed_515_begin_0, end = normed_515_end_0, end_mask = normed_515_end_mask_0, x = normed_513_cast_fp16)[name = string("normed_515")]; - tensor var_14577_to_fp16 = const()[name = string("op_14577_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452400448)))]; - tensor k_43_cast_fp16 = mul(x = normed_515, y = var_14577_to_fp16)[name = string("k_43_cast_fp16")]; - tensor var_14579_cast_fp16 = mul(x = q_43_cast_fp16, y = cos_1_cast_fp16)[name = string("op_14579_cast_fp16")]; - tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_85_cast_fp16 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_43_cast_fp16)[name = string("x1_85_cast_fp16")]; - tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_85_cast_fp16 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_43_cast_fp16)[name = string("x2_85_cast_fp16")]; - fp16 const_812_promoted_to_fp16 = const()[name = string("const_812_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14600_cast_fp16 = mul(x = x2_85_cast_fp16, y = const_812_promoted_to_fp16)[name = string("op_14600_cast_fp16")]; - int32 var_14602 = const()[name = string("op_14602"), val = int32(-1)]; - bool var_14603_interleave_0 = const()[name = string("op_14603_interleave_0"), val = bool(false)]; - tensor var_14603_cast_fp16 = concat(axis = var_14602, interleave = var_14603_interleave_0, values = (var_14600_cast_fp16, x1_85_cast_fp16))[name = string("op_14603_cast_fp16")]; - tensor var_14604_cast_fp16 = mul(x = var_14603_cast_fp16, y = sin_1_cast_fp16)[name = string("op_14604_cast_fp16")]; - tensor query_states_85_cast_fp16 = add(x = var_14579_cast_fp16, y = var_14604_cast_fp16)[name = string("query_states_85_cast_fp16")]; - tensor var_14607_cast_fp16 = mul(x = k_43_cast_fp16, y = cos_1_cast_fp16)[name = string("op_14607_cast_fp16")]; - tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_87_cast_fp16 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_43_cast_fp16)[name = string("x1_87_cast_fp16")]; - tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_87_cast_fp16 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_43_cast_fp16)[name = string("x2_87_cast_fp16")]; - fp16 const_815_promoted_to_fp16 = const()[name = string("const_815_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14628_cast_fp16 = mul(x = x2_87_cast_fp16, y = const_815_promoted_to_fp16)[name = string("op_14628_cast_fp16")]; - int32 var_14630 = const()[name = string("op_14630"), val = int32(-1)]; - bool var_14631_interleave_0 = const()[name = string("op_14631_interleave_0"), val = bool(false)]; - tensor var_14631_cast_fp16 = concat(axis = var_14630, interleave = var_14631_interleave_0, values = (var_14628_cast_fp16, x1_87_cast_fp16))[name = string("op_14631_cast_fp16")]; - tensor var_14632_cast_fp16 = mul(x = var_14631_cast_fp16, y = sin_1_cast_fp16)[name = string("op_14632_cast_fp16")]; - tensor key_states_85_cast_fp16 = add(x = var_14607_cast_fp16, y = var_14632_cast_fp16)[name = string("key_states_85_cast_fp16")]; - tensor expand_dims_252 = const()[name = string("expand_dims_252"), val = tensor([18])]; - tensor expand_dims_253 = const()[name = string("expand_dims_253"), val = tensor([0])]; - tensor expand_dims_255 = const()[name = string("expand_dims_255"), val = tensor([0])]; - tensor expand_dims_256 = const()[name = string("expand_dims_256"), val = tensor([19])]; - int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)]; - bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)]; - tensor concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (expand_dims_252, expand_dims_253, current_pos, expand_dims_255))[name = string("concat_170")]; - tensor concat_171_values1_0 = const()[name = string("concat_171_values1_0"), val = tensor([0])]; - tensor concat_171_values3_0 = const()[name = string("concat_171_values3_0"), val = tensor([0])]; - int32 concat_171_axis_0 = const()[name = string("concat_171_axis_0"), val = int32(0)]; - bool concat_171_interleave_0 = const()[name = string("concat_171_interleave_0"), val = bool(false)]; - tensor concat_171 = concat(axis = concat_171_axis_0, interleave = concat_171_interleave_0, values = (expand_dims_256, concat_171_values1_0, var_1909, concat_171_values3_0))[name = string("concat_171")]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16 = slice_update(begin = concat_170, begin_mask = model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0, end = concat_171, end_mask = model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_37_stride_0, update = key_states_85_cast_fp16, x = coreml_update_state_93)[name = string("model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_42_write_state")]; - tensor coreml_update_state_94 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_42")]; - tensor expand_dims_258 = const()[name = string("expand_dims_258"), val = tensor([40])]; - tensor expand_dims_259 = const()[name = string("expand_dims_259"), val = tensor([0])]; - tensor expand_dims_261 = const()[name = string("expand_dims_261"), val = tensor([0])]; - tensor expand_dims_262 = const()[name = string("expand_dims_262"), val = tensor([41])]; - int32 concat_174_axis_0 = const()[name = string("concat_174_axis_0"), val = int32(0)]; - bool concat_174_interleave_0 = const()[name = string("concat_174_interleave_0"), val = bool(false)]; - tensor concat_174 = concat(axis = concat_174_axis_0, interleave = concat_174_interleave_0, values = (expand_dims_258, expand_dims_259, current_pos, expand_dims_261))[name = string("concat_174")]; - tensor concat_175_values1_0 = const()[name = string("concat_175_values1_0"), val = tensor([0])]; - tensor concat_175_values3_0 = const()[name = string("concat_175_values3_0"), val = tensor([0])]; - int32 concat_175_axis_0 = const()[name = string("concat_175_axis_0"), val = int32(0)]; - bool concat_175_interleave_0 = const()[name = string("concat_175_interleave_0"), val = bool(false)]; - tensor concat_175 = concat(axis = concat_175_axis_0, interleave = concat_175_interleave_0, values = (expand_dims_262, concat_175_values1_0, var_1909, concat_175_values3_0))[name = string("concat_175")]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16 = slice_update(begin = concat_174, begin_mask = model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0, end = concat_175, end_mask = model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_38_stride_0, update = var_14519, x = coreml_update_state_94)[name = string("model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_43_write_state")]; - tensor coreml_update_state_95 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_43")]; - tensor var_14687_begin_0 = const()[name = string("op_14687_begin_0"), val = tensor([18, 0, 0, 0])]; - tensor var_14687_end_0 = const()[name = string("op_14687_end_0"), val = tensor([19, 1, 512, 256])]; - tensor var_14687_end_mask_0 = const()[name = string("op_14687_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_14687_cast_fp16 = slice_by_index(begin = var_14687_begin_0, end = var_14687_end_0, end_mask = var_14687_end_mask_0, x = coreml_update_state_95)[name = string("op_14687_cast_fp16")]; - tensor var_14694_begin_0 = const()[name = string("op_14694_begin_0"), val = tensor([40, 0, 0, 0])]; - tensor var_14694_end_0 = const()[name = string("op_14694_end_0"), val = tensor([41, 1, 512, 256])]; - tensor var_14694_end_mask_0 = const()[name = string("op_14694_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_14694_cast_fp16 = slice_by_index(begin = var_14694_begin_0, end = var_14694_end_0, end_mask = var_14694_end_mask_0, x = coreml_update_state_95)[name = string("op_14694_cast_fp16")]; - tensor var_14731 = const()[name = string("op_14731"), val = tensor([1, 4, 1, 1])]; - tensor x_341_cast_fp16 = tile(reps = var_14731, x = var_14687_cast_fp16)[name = string("x_341_cast_fp16")]; - tensor var_14751 = const()[name = string("op_14751"), val = tensor([1, 4, 1, 1])]; - tensor x_347_cast_fp16 = tile(reps = var_14751, x = var_14694_cast_fp16)[name = string("x_347_cast_fp16")]; - bool var_14778_transpose_x_1 = const()[name = string("op_14778_transpose_x_1"), val = bool(false)]; - bool var_14778_transpose_y_1 = const()[name = string("op_14778_transpose_y_1"), val = bool(true)]; - tensor var_14778 = matmul(transpose_x = var_14778_transpose_x_1, transpose_y = var_14778_transpose_y_1, x = query_states_85_cast_fp16, y = x_341_cast_fp16)[name = string("op_14778")]; - fp16 var_14779_to_fp16 = const()[name = string("op_14779_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_127_cast_fp16 = mul(x = var_14778, y = var_14779_to_fp16)[name = string("attn_weights_127_cast_fp16")]; - tensor attn_weights_129_cast_fp16 = add(x = attn_weights_127_cast_fp16, y = var_2083)[name = string("attn_weights_129_cast_fp16")]; - int32 var_14814 = const()[name = string("op_14814"), val = int32(-1)]; - tensor attn_weights_131_cast_fp16 = softmax(axis = var_14814, x = attn_weights_129_cast_fp16)[name = string("attn_weights_131_cast_fp16")]; - bool attn_output_211_transpose_x_0 = const()[name = string("attn_output_211_transpose_x_0"), val = bool(false)]; - bool attn_output_211_transpose_y_0 = const()[name = string("attn_output_211_transpose_y_0"), val = bool(false)]; - tensor attn_output_211_cast_fp16 = matmul(transpose_x = attn_output_211_transpose_x_0, transpose_y = attn_output_211_transpose_y_0, x = attn_weights_131_cast_fp16, y = x_347_cast_fp16)[name = string("attn_output_211_cast_fp16")]; - tensor var_14825_perm_0 = const()[name = string("op_14825_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_14829 = const()[name = string("op_14829"), val = tensor([1, 1, 1024])]; - tensor var_14825_cast_fp16 = transpose(perm = var_14825_perm_0, x = attn_output_211_cast_fp16)[name = string("transpose_28")]; - tensor attn_output_215_cast_fp16 = reshape(shape = var_14829, x = var_14825_cast_fp16)[name = string("attn_output_215_cast_fp16")]; - tensor var_14834 = const()[name = string("op_14834"), val = tensor([0, 2, 1])]; - string var_14850_pad_type_0 = const()[name = string("op_14850_pad_type_0"), val = string("valid")]; - int32 var_14850_groups_0 = const()[name = string("op_14850_groups_0"), val = int32(1)]; - tensor var_14850_strides_0 = const()[name = string("op_14850_strides_0"), val = tensor([1])]; - tensor var_14850_pad_0 = const()[name = string("op_14850_pad_0"), val = tensor([0, 0])]; - tensor var_14850_dilations_0 = const()[name = string("op_14850_dilations_0"), val = tensor([1])]; - tensor squeeze_21_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452401024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453285824))))[name = string("squeeze_21_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_14835_cast_fp16 = transpose(perm = var_14834, x = attn_output_215_cast_fp16)[name = string("transpose_27")]; - tensor var_14850_cast_fp16 = conv(dilations = var_14850_dilations_0, groups = var_14850_groups_0, pad = var_14850_pad_0, pad_type = var_14850_pad_type_0, strides = var_14850_strides_0, weight = squeeze_21_cast_fp16_to_fp32_to_fp16_palettized, x = var_14835_cast_fp16)[name = string("op_14850_cast_fp16")]; - tensor var_14854 = const()[name = string("op_14854"), val = tensor([0, 2, 1])]; - int32 var_14865 = const()[name = string("op_14865"), val = int32(-1)]; - fp16 const_824_promoted_to_fp16 = const()[name = string("const_824_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_345_cast_fp16 = transpose(perm = var_14854, x = var_14850_cast_fp16)[name = string("transpose_26")]; - tensor var_14867_cast_fp16 = mul(x = hidden_states_345_cast_fp16, y = const_824_promoted_to_fp16)[name = string("op_14867_cast_fp16")]; - bool input_431_interleave_0 = const()[name = string("input_431_interleave_0"), val = bool(false)]; - tensor input_431_cast_fp16 = concat(axis = var_14865, interleave = input_431_interleave_0, values = (hidden_states_345_cast_fp16, var_14867_cast_fp16))[name = string("input_431_cast_fp16")]; - tensor normed_517_axes_0 = const()[name = string("normed_517_axes_0"), val = tensor([-1])]; - fp16 var_14862_to_fp16 = const()[name = string("op_14862_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_517_cast_fp16 = layer_norm(axes = normed_517_axes_0, epsilon = var_14862_to_fp16, x = input_431_cast_fp16)[name = string("normed_517_cast_fp16")]; - tensor normed_519_begin_0 = const()[name = string("normed_519_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_519_end_0 = const()[name = string("normed_519_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_519_end_mask_0 = const()[name = string("normed_519_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_519_cast_fp16 = slice_by_index(begin = normed_519_begin_0, end = normed_519_end_0, end_mask = normed_519_end_mask_0, x = normed_517_cast_fp16)[name = string("normed_519_cast_fp16")]; - tensor var_14881_to_fp16 = const()[name = string("op_14881_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453304320)))]; - tensor attn_output_219_cast_fp16 = mul(x = normed_519_cast_fp16, y = var_14881_to_fp16)[name = string("attn_output_219_cast_fp16")]; - tensor hidden_states_347_cast_fp16 = add(x = hidden_states_337_cast_fp16, y = attn_output_219_cast_fp16)[name = string("hidden_states_347_cast_fp16")]; - int32 var_14894 = const()[name = string("op_14894"), val = int32(-1)]; - fp16 const_828_promoted_to_fp16 = const()[name = string("const_828_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14896_cast_fp16 = mul(x = hidden_states_347_cast_fp16, y = const_828_promoted_to_fp16)[name = string("op_14896_cast_fp16")]; - bool input_433_interleave_0 = const()[name = string("input_433_interleave_0"), val = bool(false)]; - tensor input_433_cast_fp16 = concat(axis = var_14894, interleave = input_433_interleave_0, values = (hidden_states_347_cast_fp16, var_14896_cast_fp16))[name = string("input_433_cast_fp16")]; - tensor normed_521_axes_0 = const()[name = string("normed_521_axes_0"), val = tensor([-1])]; - fp16 var_14891_to_fp16 = const()[name = string("op_14891_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_521_cast_fp16 = layer_norm(axes = normed_521_axes_0, epsilon = var_14891_to_fp16, x = input_433_cast_fp16)[name = string("normed_521_cast_fp16")]; - tensor normed_523_begin_0 = const()[name = string("normed_523_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_523_end_0 = const()[name = string("normed_523_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_523_end_mask_0 = const()[name = string("normed_523_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_523_cast_fp16 = slice_by_index(begin = normed_523_begin_0, end = normed_523_end_0, end_mask = normed_523_end_mask_0, x = normed_521_cast_fp16)[name = string("normed_523_cast_fp16")]; - tensor var_14910_to_fp16 = const()[name = string("op_14910_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453306688)))]; - tensor x_349_cast_fp16 = mul(x = normed_523_cast_fp16, y = var_14910_to_fp16)[name = string("x_349_cast_fp16")]; - tensor var_14922 = const()[name = string("op_14922"), val = tensor([0, 2, 1])]; - tensor input_435_axes_0 = const()[name = string("input_435_axes_0"), val = tensor([2])]; - tensor var_14923_cast_fp16 = transpose(perm = var_14922, x = x_349_cast_fp16)[name = string("transpose_25")]; - tensor input_435_cast_fp16 = expand_dims(axes = input_435_axes_0, x = var_14923_cast_fp16)[name = string("input_435_cast_fp16")]; - string x_351_pad_type_0 = const()[name = string("x_351_pad_type_0"), val = string("valid")]; - tensor x_351_strides_0 = const()[name = string("x_351_strides_0"), val = tensor([1, 1])]; - tensor x_351_pad_0 = const()[name = string("x_351_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_351_dilations_0 = const()[name = string("x_351_dilations_0"), val = tensor([1, 1])]; - int32 x_351_groups_0 = const()[name = string("x_351_groups_0"), val = int32(1)]; - tensor model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453309056))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459281088))))[name = string("model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_351_cast_fp16 = conv(dilations = x_351_dilations_0, groups = x_351_groups_0, pad = x_351_pad_0, pad_type = x_351_pad_type_0, strides = x_351_strides_0, weight = model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_435_cast_fp16)[name = string("x_351_cast_fp16")]; - string b_43_pad_type_0 = const()[name = string("b_43_pad_type_0"), val = string("valid")]; - tensor b_43_strides_0 = const()[name = string("b_43_strides_0"), val = tensor([1, 1])]; - tensor b_43_pad_0 = const()[name = string("b_43_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_43_dilations_0 = const()[name = string("b_43_dilations_0"), val = tensor([1, 1])]; - int32 b_43_groups_0 = const()[name = string("b_43_groups_0"), val = int32(1)]; - tensor model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459391744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465363776))))[name = string("model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_43_cast_fp16 = conv(dilations = b_43_dilations_0, groups = b_43_groups_0, pad = b_43_pad_0, pad_type = b_43_pad_type_0, strides = b_43_strides_0, weight = model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_435_cast_fp16)[name = string("b_43_cast_fp16")]; - string var_14948_mode_0 = const()[name = string("op_14948_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_14948_cast_fp16 = gelu(mode = var_14948_mode_0, x = x_351_cast_fp16)[name = string("op_14948_cast_fp16")]; - tensor input_437_cast_fp16 = mul(x = var_14948_cast_fp16, y = b_43_cast_fp16)[name = string("input_437_cast_fp16")]; - string e_43_pad_type_0 = const()[name = string("e_43_pad_type_0"), val = string("valid")]; - tensor e_43_strides_0 = const()[name = string("e_43_strides_0"), val = tensor([1, 1])]; - tensor e_43_pad_0 = const()[name = string("e_43_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_43_dilations_0 = const()[name = string("e_43_dilations_0"), val = tensor([1, 1])]; - int32 e_43_groups_0 = const()[name = string("e_43_groups_0"), val = int32(1)]; - tensor model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465474432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471446464))))[name = string("model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_43_cast_fp16 = conv(dilations = e_43_dilations_0, groups = e_43_groups_0, pad = e_43_pad_0, pad_type = e_43_pad_type_0, strides = e_43_strides_0, weight = model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_437_cast_fp16)[name = string("e_43_cast_fp16")]; - tensor var_14956_axes_0 = const()[name = string("op_14956_axes_0"), val = tensor([2])]; - tensor var_14956_cast_fp16 = squeeze(axes = var_14956_axes_0, x = e_43_cast_fp16)[name = string("op_14956_cast_fp16")]; - tensor var_14957 = const()[name = string("op_14957"), val = tensor([0, 2, 1])]; - int32 var_14968 = const()[name = string("op_14968"), val = int32(-1)]; - fp16 const_832_promoted_to_fp16 = const()[name = string("const_832_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_349_cast_fp16 = transpose(perm = var_14957, x = var_14956_cast_fp16)[name = string("transpose_24")]; - tensor var_14970_cast_fp16 = mul(x = hidden_states_349_cast_fp16, y = const_832_promoted_to_fp16)[name = string("op_14970_cast_fp16")]; - bool input_439_interleave_0 = const()[name = string("input_439_interleave_0"), val = bool(false)]; - tensor input_439_cast_fp16 = concat(axis = var_14968, interleave = input_439_interleave_0, values = (hidden_states_349_cast_fp16, var_14970_cast_fp16))[name = string("input_439_cast_fp16")]; - tensor normed_525_axes_0 = const()[name = string("normed_525_axes_0"), val = tensor([-1])]; - fp16 var_14965_to_fp16 = const()[name = string("op_14965_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_525_cast_fp16 = layer_norm(axes = normed_525_axes_0, epsilon = var_14965_to_fp16, x = input_439_cast_fp16)[name = string("normed_525_cast_fp16")]; - tensor normed_527_begin_0 = const()[name = string("normed_527_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_527_end_0 = const()[name = string("normed_527_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_527_end_mask_0 = const()[name = string("normed_527_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_527_cast_fp16 = slice_by_index(begin = normed_527_begin_0, end = normed_527_end_0, end_mask = normed_527_end_mask_0, x = normed_525_cast_fp16)[name = string("normed_527_cast_fp16")]; - tensor var_14984_to_fp16 = const()[name = string("op_14984_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471464960)))]; - tensor hidden_states_351_cast_fp16 = mul(x = normed_527_cast_fp16, y = var_14984_to_fp16)[name = string("hidden_states_351_cast_fp16")]; - tensor hidden_states_353_cast_fp16 = add(x = hidden_states_347_cast_fp16, y = hidden_states_351_cast_fp16)[name = string("hidden_states_353_cast_fp16")]; - int32 var_15035 = const()[name = string("op_15035"), val = int32(-1)]; - fp16 const_836_promoted_to_fp16 = const()[name = string("const_836_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15037_cast_fp16 = mul(x = hidden_states_353_cast_fp16, y = const_836_promoted_to_fp16)[name = string("op_15037_cast_fp16")]; - bool input_441_interleave_0 = const()[name = string("input_441_interleave_0"), val = bool(false)]; - tensor input_441_cast_fp16 = concat(axis = var_15035, interleave = input_441_interleave_0, values = (hidden_states_353_cast_fp16, var_15037_cast_fp16))[name = string("input_441_cast_fp16")]; - tensor normed_529_axes_0 = const()[name = string("normed_529_axes_0"), val = tensor([-1])]; - fp16 var_15032_to_fp16 = const()[name = string("op_15032_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_529_cast_fp16 = layer_norm(axes = normed_529_axes_0, epsilon = var_15032_to_fp16, x = input_441_cast_fp16)[name = string("normed_529_cast_fp16")]; - tensor normed_531_begin_0 = const()[name = string("normed_531_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_531_end_0 = const()[name = string("normed_531_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_531_end_mask_0 = const()[name = string("normed_531_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_531_cast_fp16 = slice_by_index(begin = normed_531_begin_0, end = normed_531_end_0, end_mask = normed_531_end_mask_0, x = normed_529_cast_fp16)[name = string("normed_531_cast_fp16")]; - tensor var_15051_to_fp16 = const()[name = string("op_15051_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471467328)))]; - tensor hidden_states_355_cast_fp16 = mul(x = normed_531_cast_fp16, y = var_15051_to_fp16)[name = string("hidden_states_355_cast_fp16")]; - tensor var_15056 = const()[name = string("op_15056"), val = tensor([0, 2, 1])]; - tensor var_15059_axes_0 = const()[name = string("op_15059_axes_0"), val = tensor([2])]; - tensor var_15057_cast_fp16 = transpose(perm = var_15056, x = hidden_states_355_cast_fp16)[name = string("transpose_23")]; - tensor var_15059_cast_fp16 = expand_dims(axes = var_15059_axes_0, x = var_15057_cast_fp16)[name = string("op_15059_cast_fp16")]; - string var_15075_pad_type_0 = const()[name = string("op_15075_pad_type_0"), val = string("valid")]; - tensor var_15075_strides_0 = const()[name = string("op_15075_strides_0"), val = tensor([1, 1])]; - tensor var_15075_pad_0 = const()[name = string("op_15075_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_15075_dilations_0 = const()[name = string("op_15075_dilations_0"), val = tensor([1, 1])]; - int32 var_15075_groups_0 = const()[name = string("op_15075_groups_0"), val = int32(1)]; - tensor var_15075 = conv(dilations = var_15075_dilations_0, groups = var_15075_groups_0, pad = var_15075_pad_0, pad_type = var_15075_pad_type_0, strides = var_15075_strides_0, weight = model_model_layers_22_self_attn_q_proj_weight_palettized, x = var_15059_cast_fp16)[name = string("op_15075")]; - tensor var_15080 = const()[name = string("op_15080"), val = tensor([1, 4, 1, 256])]; - tensor var_15081 = reshape(shape = var_15080, x = var_15075)[name = string("op_15081")]; - string var_15097_pad_type_0 = const()[name = string("op_15097_pad_type_0"), val = string("valid")]; - tensor var_15097_strides_0 = const()[name = string("op_15097_strides_0"), val = tensor([1, 1])]; - tensor var_15097_pad_0 = const()[name = string("op_15097_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_15097_dilations_0 = const()[name = string("op_15097_dilations_0"), val = tensor([1, 1])]; - int32 var_15097_groups_0 = const()[name = string("op_15097_groups_0"), val = int32(1)]; - tensor var_15097 = conv(dilations = var_15097_dilations_0, groups = var_15097_groups_0, pad = var_15097_pad_0, pad_type = var_15097_pad_type_0, strides = var_15097_strides_0, weight = model_model_layers_22_self_attn_k_proj_weight_palettized, x = var_15059_cast_fp16)[name = string("op_15097")]; - tensor var_15102 = const()[name = string("op_15102"), val = tensor([1, 1, 1, 256])]; - tensor var_15103 = reshape(shape = var_15102, x = var_15097)[name = string("op_15103")]; - string var_15119_pad_type_0 = const()[name = string("op_15119_pad_type_0"), val = string("valid")]; - tensor var_15119_strides_0 = const()[name = string("op_15119_strides_0"), val = tensor([1, 1])]; - tensor var_15119_pad_0 = const()[name = string("op_15119_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_15119_dilations_0 = const()[name = string("op_15119_dilations_0"), val = tensor([1, 1])]; - int32 var_15119_groups_0 = const()[name = string("op_15119_groups_0"), val = int32(1)]; - tensor var_15119 = conv(dilations = var_15119_dilations_0, groups = var_15119_groups_0, pad = var_15119_pad_0, pad_type = var_15119_pad_type_0, strides = var_15119_strides_0, weight = model_model_layers_22_self_attn_v_proj_weight_palettized, x = var_15059_cast_fp16)[name = string("op_15119")]; - tensor var_15124 = const()[name = string("op_15124"), val = tensor([1, 1, 1, 256])]; - tensor var_15125 = reshape(shape = var_15124, x = var_15119)[name = string("op_15125")]; - int32 var_15140 = const()[name = string("op_15140"), val = int32(-1)]; - fp16 const_840_promoted = const()[name = string("const_840_promoted"), val = fp16(-0x1p+0)]; - tensor var_15142 = mul(x = var_15081, y = const_840_promoted)[name = string("op_15142")]; - bool input_445_interleave_0 = const()[name = string("input_445_interleave_0"), val = bool(false)]; - tensor input_445 = concat(axis = var_15140, interleave = input_445_interleave_0, values = (var_15081, var_15142))[name = string("input_445")]; - tensor normed_533_axes_0 = const()[name = string("normed_533_axes_0"), val = tensor([-1])]; - fp16 var_15137_to_fp16 = const()[name = string("op_15137_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_533_cast_fp16 = layer_norm(axes = normed_533_axes_0, epsilon = var_15137_to_fp16, x = input_445)[name = string("normed_533_cast_fp16")]; - tensor normed_535_begin_0 = const()[name = string("normed_535_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_535_end_0 = const()[name = string("normed_535_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_535_end_mask_0 = const()[name = string("normed_535_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_535 = slice_by_index(begin = normed_535_begin_0, end = normed_535_end_0, end_mask = normed_535_end_mask_0, x = normed_533_cast_fp16)[name = string("normed_535")]; - tensor var_15156_to_fp16 = const()[name = string("op_15156_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471469696)))]; - tensor q_45_cast_fp16 = mul(x = normed_535, y = var_15156_to_fp16)[name = string("q_45_cast_fp16")]; - int32 var_15167 = const()[name = string("op_15167"), val = int32(-1)]; - fp16 const_844_promoted = const()[name = string("const_844_promoted"), val = fp16(-0x1p+0)]; - tensor var_15169 = mul(x = var_15103, y = const_844_promoted)[name = string("op_15169")]; - bool input_447_interleave_0 = const()[name = string("input_447_interleave_0"), val = bool(false)]; - tensor input_447 = concat(axis = var_15167, interleave = input_447_interleave_0, values = (var_15103, var_15169))[name = string("input_447")]; - tensor normed_537_axes_0 = const()[name = string("normed_537_axes_0"), val = tensor([-1])]; - fp16 var_15164_to_fp16 = const()[name = string("op_15164_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_537_cast_fp16 = layer_norm(axes = normed_537_axes_0, epsilon = var_15164_to_fp16, x = input_447)[name = string("normed_537_cast_fp16")]; - tensor normed_539_begin_0 = const()[name = string("normed_539_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_539_end_0 = const()[name = string("normed_539_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_539_end_mask_0 = const()[name = string("normed_539_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_539 = slice_by_index(begin = normed_539_begin_0, end = normed_539_end_0, end_mask = normed_539_end_mask_0, x = normed_537_cast_fp16)[name = string("normed_539")]; - tensor var_15183_to_fp16 = const()[name = string("op_15183_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471470272)))]; - tensor k_45_cast_fp16 = mul(x = normed_539, y = var_15183_to_fp16)[name = string("k_45_cast_fp16")]; - tensor var_15185_cast_fp16 = mul(x = q_45_cast_fp16, y = cos_1_cast_fp16)[name = string("op_15185_cast_fp16")]; - tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_89_cast_fp16 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_45_cast_fp16)[name = string("x1_89_cast_fp16")]; - tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_89_cast_fp16 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_45_cast_fp16)[name = string("x2_89_cast_fp16")]; - fp16 const_850_promoted_to_fp16 = const()[name = string("const_850_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15206_cast_fp16 = mul(x = x2_89_cast_fp16, y = const_850_promoted_to_fp16)[name = string("op_15206_cast_fp16")]; - int32 var_15208 = const()[name = string("op_15208"), val = int32(-1)]; - bool var_15209_interleave_0 = const()[name = string("op_15209_interleave_0"), val = bool(false)]; - tensor var_15209_cast_fp16 = concat(axis = var_15208, interleave = var_15209_interleave_0, values = (var_15206_cast_fp16, x1_89_cast_fp16))[name = string("op_15209_cast_fp16")]; - tensor var_15210_cast_fp16 = mul(x = var_15209_cast_fp16, y = sin_1_cast_fp16)[name = string("op_15210_cast_fp16")]; - tensor query_states_89_cast_fp16 = add(x = var_15185_cast_fp16, y = var_15210_cast_fp16)[name = string("query_states_89_cast_fp16")]; - tensor var_15213_cast_fp16 = mul(x = k_45_cast_fp16, y = cos_1_cast_fp16)[name = string("op_15213_cast_fp16")]; - tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_91_cast_fp16 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_45_cast_fp16)[name = string("x1_91_cast_fp16")]; - tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_91_cast_fp16 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_45_cast_fp16)[name = string("x2_91_cast_fp16")]; - fp16 const_853_promoted_to_fp16 = const()[name = string("const_853_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15234_cast_fp16 = mul(x = x2_91_cast_fp16, y = const_853_promoted_to_fp16)[name = string("op_15234_cast_fp16")]; - int32 var_15236 = const()[name = string("op_15236"), val = int32(-1)]; - bool var_15237_interleave_0 = const()[name = string("op_15237_interleave_0"), val = bool(false)]; - tensor var_15237_cast_fp16 = concat(axis = var_15236, interleave = var_15237_interleave_0, values = (var_15234_cast_fp16, x1_91_cast_fp16))[name = string("op_15237_cast_fp16")]; - tensor var_15238_cast_fp16 = mul(x = var_15237_cast_fp16, y = sin_1_cast_fp16)[name = string("op_15238_cast_fp16")]; - tensor key_states_89_cast_fp16 = add(x = var_15213_cast_fp16, y = var_15238_cast_fp16)[name = string("key_states_89_cast_fp16")]; - tensor expand_dims_264 = const()[name = string("expand_dims_264"), val = tensor([19])]; - tensor expand_dims_265 = const()[name = string("expand_dims_265"), val = tensor([0])]; - tensor expand_dims_267 = const()[name = string("expand_dims_267"), val = tensor([0])]; - tensor expand_dims_268 = const()[name = string("expand_dims_268"), val = tensor([20])]; - int32 concat_178_axis_0 = const()[name = string("concat_178_axis_0"), val = int32(0)]; - bool concat_178_interleave_0 = const()[name = string("concat_178_interleave_0"), val = bool(false)]; - tensor concat_178 = concat(axis = concat_178_axis_0, interleave = concat_178_interleave_0, values = (expand_dims_264, expand_dims_265, current_pos, expand_dims_267))[name = string("concat_178")]; - tensor concat_179_values1_0 = const()[name = string("concat_179_values1_0"), val = tensor([0])]; - tensor concat_179_values3_0 = const()[name = string("concat_179_values3_0"), val = tensor([0])]; - int32 concat_179_axis_0 = const()[name = string("concat_179_axis_0"), val = int32(0)]; - bool concat_179_interleave_0 = const()[name = string("concat_179_interleave_0"), val = bool(false)]; - tensor concat_179 = concat(axis = concat_179_axis_0, interleave = concat_179_interleave_0, values = (expand_dims_268, concat_179_values1_0, var_1909, concat_179_values3_0))[name = string("concat_179")]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16 = slice_update(begin = concat_178, begin_mask = model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0, end = concat_179, end_mask = model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_39_stride_0, update = key_states_89_cast_fp16, x = coreml_update_state_95)[name = string("model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_44_write_state")]; - tensor coreml_update_state_96 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_44")]; - tensor expand_dims_270 = const()[name = string("expand_dims_270"), val = tensor([41])]; - tensor expand_dims_271 = const()[name = string("expand_dims_271"), val = tensor([0])]; - tensor expand_dims_273 = const()[name = string("expand_dims_273"), val = tensor([0])]; - tensor expand_dims_274 = const()[name = string("expand_dims_274"), val = tensor([42])]; - int32 concat_182_axis_0 = const()[name = string("concat_182_axis_0"), val = int32(0)]; - bool concat_182_interleave_0 = const()[name = string("concat_182_interleave_0"), val = bool(false)]; - tensor concat_182 = concat(axis = concat_182_axis_0, interleave = concat_182_interleave_0, values = (expand_dims_270, expand_dims_271, current_pos, expand_dims_273))[name = string("concat_182")]; - tensor concat_183_values1_0 = const()[name = string("concat_183_values1_0"), val = tensor([0])]; - tensor concat_183_values3_0 = const()[name = string("concat_183_values3_0"), val = tensor([0])]; - int32 concat_183_axis_0 = const()[name = string("concat_183_axis_0"), val = int32(0)]; - bool concat_183_interleave_0 = const()[name = string("concat_183_interleave_0"), val = bool(false)]; - tensor concat_183 = concat(axis = concat_183_axis_0, interleave = concat_183_interleave_0, values = (expand_dims_274, concat_183_values1_0, var_1909, concat_183_values3_0))[name = string("concat_183")]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16 = slice_update(begin = concat_182, begin_mask = model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0, end = concat_183, end_mask = model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_40_stride_0, update = var_15125, x = coreml_update_state_96)[name = string("model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_45_write_state")]; - tensor coreml_update_state_97 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_45")]; - tensor var_15293_begin_0 = const()[name = string("op_15293_begin_0"), val = tensor([19, 0, 0, 0])]; - tensor var_15293_end_0 = const()[name = string("op_15293_end_0"), val = tensor([20, 1, 512, 256])]; - tensor var_15293_end_mask_0 = const()[name = string("op_15293_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_15293_cast_fp16 = slice_by_index(begin = var_15293_begin_0, end = var_15293_end_0, end_mask = var_15293_end_mask_0, x = coreml_update_state_97)[name = string("op_15293_cast_fp16")]; - tensor var_15300_begin_0 = const()[name = string("op_15300_begin_0"), val = tensor([41, 0, 0, 0])]; - tensor var_15300_end_0 = const()[name = string("op_15300_end_0"), val = tensor([42, 1, 512, 256])]; - tensor var_15300_end_mask_0 = const()[name = string("op_15300_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_15300_cast_fp16 = slice_by_index(begin = var_15300_begin_0, end = var_15300_end_0, end_mask = var_15300_end_mask_0, x = coreml_update_state_97)[name = string("op_15300_cast_fp16")]; - tensor var_15337 = const()[name = string("op_15337"), val = tensor([1, 4, 1, 1])]; - tensor x_357_cast_fp16 = tile(reps = var_15337, x = var_15293_cast_fp16)[name = string("x_357_cast_fp16")]; - tensor var_15357 = const()[name = string("op_15357"), val = tensor([1, 4, 1, 1])]; - tensor x_363_cast_fp16 = tile(reps = var_15357, x = var_15300_cast_fp16)[name = string("x_363_cast_fp16")]; - bool var_15384_transpose_x_1 = const()[name = string("op_15384_transpose_x_1"), val = bool(false)]; - bool var_15384_transpose_y_1 = const()[name = string("op_15384_transpose_y_1"), val = bool(true)]; - tensor var_15384 = matmul(transpose_x = var_15384_transpose_x_1, transpose_y = var_15384_transpose_y_1, x = query_states_89_cast_fp16, y = x_357_cast_fp16)[name = string("op_15384")]; - fp16 var_15385_to_fp16 = const()[name = string("op_15385_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_133_cast_fp16 = mul(x = var_15384, y = var_15385_to_fp16)[name = string("attn_weights_133_cast_fp16")]; - tensor attn_weights_135_cast_fp16 = add(x = attn_weights_133_cast_fp16, y = var_2083)[name = string("attn_weights_135_cast_fp16")]; - int32 var_15420 = const()[name = string("op_15420"), val = int32(-1)]; - tensor attn_weights_137_cast_fp16 = softmax(axis = var_15420, x = attn_weights_135_cast_fp16)[name = string("attn_weights_137_cast_fp16")]; - bool attn_output_221_transpose_x_0 = const()[name = string("attn_output_221_transpose_x_0"), val = bool(false)]; - bool attn_output_221_transpose_y_0 = const()[name = string("attn_output_221_transpose_y_0"), val = bool(false)]; - tensor attn_output_221_cast_fp16 = matmul(transpose_x = attn_output_221_transpose_x_0, transpose_y = attn_output_221_transpose_y_0, x = attn_weights_137_cast_fp16, y = x_363_cast_fp16)[name = string("attn_output_221_cast_fp16")]; - tensor var_15431_perm_0 = const()[name = string("op_15431_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_15435 = const()[name = string("op_15435"), val = tensor([1, 1, 1024])]; - tensor var_15431_cast_fp16 = transpose(perm = var_15431_perm_0, x = attn_output_221_cast_fp16)[name = string("transpose_22")]; - tensor attn_output_225_cast_fp16 = reshape(shape = var_15435, x = var_15431_cast_fp16)[name = string("attn_output_225_cast_fp16")]; - tensor var_15440 = const()[name = string("op_15440"), val = tensor([0, 2, 1])]; - string var_15456_pad_type_0 = const()[name = string("op_15456_pad_type_0"), val = string("valid")]; - int32 var_15456_groups_0 = const()[name = string("op_15456_groups_0"), val = int32(1)]; - tensor var_15456_strides_0 = const()[name = string("op_15456_strides_0"), val = tensor([1])]; - tensor var_15456_pad_0 = const()[name = string("op_15456_pad_0"), val = tensor([0, 0])]; - tensor var_15456_dilations_0 = const()[name = string("op_15456_dilations_0"), val = tensor([1])]; - tensor squeeze_22_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471470848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472355648))))[name = string("squeeze_22_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_15441_cast_fp16 = transpose(perm = var_15440, x = attn_output_225_cast_fp16)[name = string("transpose_21")]; - tensor var_15456_cast_fp16 = conv(dilations = var_15456_dilations_0, groups = var_15456_groups_0, pad = var_15456_pad_0, pad_type = var_15456_pad_type_0, strides = var_15456_strides_0, weight = squeeze_22_cast_fp16_to_fp32_to_fp16_palettized, x = var_15441_cast_fp16)[name = string("op_15456_cast_fp16")]; - tensor var_15460 = const()[name = string("op_15460"), val = tensor([0, 2, 1])]; - int32 var_15471 = const()[name = string("op_15471"), val = int32(-1)]; - fp16 const_862_promoted_to_fp16 = const()[name = string("const_862_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_361_cast_fp16 = transpose(perm = var_15460, x = var_15456_cast_fp16)[name = string("transpose_20")]; - tensor var_15473_cast_fp16 = mul(x = hidden_states_361_cast_fp16, y = const_862_promoted_to_fp16)[name = string("op_15473_cast_fp16")]; - bool input_451_interleave_0 = const()[name = string("input_451_interleave_0"), val = bool(false)]; - tensor input_451_cast_fp16 = concat(axis = var_15471, interleave = input_451_interleave_0, values = (hidden_states_361_cast_fp16, var_15473_cast_fp16))[name = string("input_451_cast_fp16")]; - tensor normed_541_axes_0 = const()[name = string("normed_541_axes_0"), val = tensor([-1])]; - fp16 var_15468_to_fp16 = const()[name = string("op_15468_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_541_cast_fp16 = layer_norm(axes = normed_541_axes_0, epsilon = var_15468_to_fp16, x = input_451_cast_fp16)[name = string("normed_541_cast_fp16")]; - tensor normed_543_begin_0 = const()[name = string("normed_543_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_543_end_0 = const()[name = string("normed_543_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_543_end_mask_0 = const()[name = string("normed_543_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_543_cast_fp16 = slice_by_index(begin = normed_543_begin_0, end = normed_543_end_0, end_mask = normed_543_end_mask_0, x = normed_541_cast_fp16)[name = string("normed_543_cast_fp16")]; - tensor var_15487_to_fp16 = const()[name = string("op_15487_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472374144)))]; - tensor attn_output_229_cast_fp16 = mul(x = normed_543_cast_fp16, y = var_15487_to_fp16)[name = string("attn_output_229_cast_fp16")]; - tensor hidden_states_363_cast_fp16 = add(x = hidden_states_353_cast_fp16, y = attn_output_229_cast_fp16)[name = string("hidden_states_363_cast_fp16")]; - int32 var_15500 = const()[name = string("op_15500"), val = int32(-1)]; - fp16 const_866_promoted_to_fp16 = const()[name = string("const_866_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15502_cast_fp16 = mul(x = hidden_states_363_cast_fp16, y = const_866_promoted_to_fp16)[name = string("op_15502_cast_fp16")]; - bool input_453_interleave_0 = const()[name = string("input_453_interleave_0"), val = bool(false)]; - tensor input_453_cast_fp16 = concat(axis = var_15500, interleave = input_453_interleave_0, values = (hidden_states_363_cast_fp16, var_15502_cast_fp16))[name = string("input_453_cast_fp16")]; - tensor normed_545_axes_0 = const()[name = string("normed_545_axes_0"), val = tensor([-1])]; - fp16 var_15497_to_fp16 = const()[name = string("op_15497_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_545_cast_fp16 = layer_norm(axes = normed_545_axes_0, epsilon = var_15497_to_fp16, x = input_453_cast_fp16)[name = string("normed_545_cast_fp16")]; - tensor normed_547_begin_0 = const()[name = string("normed_547_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_547_end_0 = const()[name = string("normed_547_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_547_end_mask_0 = const()[name = string("normed_547_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_547_cast_fp16 = slice_by_index(begin = normed_547_begin_0, end = normed_547_end_0, end_mask = normed_547_end_mask_0, x = normed_545_cast_fp16)[name = string("normed_547_cast_fp16")]; - tensor var_15516_to_fp16 = const()[name = string("op_15516_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472376512)))]; - tensor x_365_cast_fp16 = mul(x = normed_547_cast_fp16, y = var_15516_to_fp16)[name = string("x_365_cast_fp16")]; - tensor var_15528 = const()[name = string("op_15528"), val = tensor([0, 2, 1])]; - tensor input_455_axes_0 = const()[name = string("input_455_axes_0"), val = tensor([2])]; - tensor var_15529_cast_fp16 = transpose(perm = var_15528, x = x_365_cast_fp16)[name = string("transpose_19")]; - tensor input_455_cast_fp16 = expand_dims(axes = input_455_axes_0, x = var_15529_cast_fp16)[name = string("input_455_cast_fp16")]; - string x_367_pad_type_0 = const()[name = string("x_367_pad_type_0"), val = string("valid")]; - tensor x_367_strides_0 = const()[name = string("x_367_strides_0"), val = tensor([1, 1])]; - tensor x_367_pad_0 = const()[name = string("x_367_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_367_dilations_0 = const()[name = string("x_367_dilations_0"), val = tensor([1, 1])]; - int32 x_367_groups_0 = const()[name = string("x_367_groups_0"), val = int32(1)]; - tensor model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472378880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478350912))))[name = string("model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_367_cast_fp16 = conv(dilations = x_367_dilations_0, groups = x_367_groups_0, pad = x_367_pad_0, pad_type = x_367_pad_type_0, strides = x_367_strides_0, weight = model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_455_cast_fp16)[name = string("x_367_cast_fp16")]; - string b_45_pad_type_0 = const()[name = string("b_45_pad_type_0"), val = string("valid")]; - tensor b_45_strides_0 = const()[name = string("b_45_strides_0"), val = tensor([1, 1])]; - tensor b_45_pad_0 = const()[name = string("b_45_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_45_dilations_0 = const()[name = string("b_45_dilations_0"), val = tensor([1, 1])]; - int32 b_45_groups_0 = const()[name = string("b_45_groups_0"), val = int32(1)]; - tensor model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478461568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484433600))))[name = string("model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_45_cast_fp16 = conv(dilations = b_45_dilations_0, groups = b_45_groups_0, pad = b_45_pad_0, pad_type = b_45_pad_type_0, strides = b_45_strides_0, weight = model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_455_cast_fp16)[name = string("b_45_cast_fp16")]; - string var_15554_mode_0 = const()[name = string("op_15554_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_15554_cast_fp16 = gelu(mode = var_15554_mode_0, x = x_367_cast_fp16)[name = string("op_15554_cast_fp16")]; - tensor input_457_cast_fp16 = mul(x = var_15554_cast_fp16, y = b_45_cast_fp16)[name = string("input_457_cast_fp16")]; - string e_45_pad_type_0 = const()[name = string("e_45_pad_type_0"), val = string("valid")]; - tensor e_45_strides_0 = const()[name = string("e_45_strides_0"), val = tensor([1, 1])]; - tensor e_45_pad_0 = const()[name = string("e_45_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_45_dilations_0 = const()[name = string("e_45_dilations_0"), val = tensor([1, 1])]; - int32 e_45_groups_0 = const()[name = string("e_45_groups_0"), val = int32(1)]; - tensor model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484544256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490516288))))[name = string("model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_45_cast_fp16 = conv(dilations = e_45_dilations_0, groups = e_45_groups_0, pad = e_45_pad_0, pad_type = e_45_pad_type_0, strides = e_45_strides_0, weight = model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_457_cast_fp16)[name = string("e_45_cast_fp16")]; - tensor var_15562_axes_0 = const()[name = string("op_15562_axes_0"), val = tensor([2])]; - tensor var_15562_cast_fp16 = squeeze(axes = var_15562_axes_0, x = e_45_cast_fp16)[name = string("op_15562_cast_fp16")]; - tensor var_15563 = const()[name = string("op_15563"), val = tensor([0, 2, 1])]; - int32 var_15574 = const()[name = string("op_15574"), val = int32(-1)]; - fp16 const_870_promoted_to_fp16 = const()[name = string("const_870_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_365_cast_fp16 = transpose(perm = var_15563, x = var_15562_cast_fp16)[name = string("transpose_18")]; - tensor var_15576_cast_fp16 = mul(x = hidden_states_365_cast_fp16, y = const_870_promoted_to_fp16)[name = string("op_15576_cast_fp16")]; - bool input_459_interleave_0 = const()[name = string("input_459_interleave_0"), val = bool(false)]; - tensor input_459_cast_fp16 = concat(axis = var_15574, interleave = input_459_interleave_0, values = (hidden_states_365_cast_fp16, var_15576_cast_fp16))[name = string("input_459_cast_fp16")]; - tensor normed_549_axes_0 = const()[name = string("normed_549_axes_0"), val = tensor([-1])]; - fp16 var_15571_to_fp16 = const()[name = string("op_15571_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_549_cast_fp16 = layer_norm(axes = normed_549_axes_0, epsilon = var_15571_to_fp16, x = input_459_cast_fp16)[name = string("normed_549_cast_fp16")]; - tensor normed_551_begin_0 = const()[name = string("normed_551_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_551_end_0 = const()[name = string("normed_551_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_551_end_mask_0 = const()[name = string("normed_551_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_551_cast_fp16 = slice_by_index(begin = normed_551_begin_0, end = normed_551_end_0, end_mask = normed_551_end_mask_0, x = normed_549_cast_fp16)[name = string("normed_551_cast_fp16")]; - tensor var_15590_to_fp16 = const()[name = string("op_15590_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490534784)))]; - tensor hidden_states_367_cast_fp16 = mul(x = normed_551_cast_fp16, y = var_15590_to_fp16)[name = string("hidden_states_367_cast_fp16")]; - tensor hidden_states_369_cast_fp16 = add(x = hidden_states_363_cast_fp16, y = hidden_states_367_cast_fp16)[name = string("hidden_states_369_cast_fp16")]; - int32 var_15641 = const()[name = string("op_15641"), val = int32(-1)]; - fp16 const_874_promoted_to_fp16 = const()[name = string("const_874_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15643_cast_fp16 = mul(x = hidden_states_369_cast_fp16, y = const_874_promoted_to_fp16)[name = string("op_15643_cast_fp16")]; - bool input_461_interleave_0 = const()[name = string("input_461_interleave_0"), val = bool(false)]; - tensor input_461_cast_fp16 = concat(axis = var_15641, interleave = input_461_interleave_0, values = (hidden_states_369_cast_fp16, var_15643_cast_fp16))[name = string("input_461_cast_fp16")]; - tensor normed_553_axes_0 = const()[name = string("normed_553_axes_0"), val = tensor([-1])]; - fp16 var_15638_to_fp16 = const()[name = string("op_15638_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_553_cast_fp16 = layer_norm(axes = normed_553_axes_0, epsilon = var_15638_to_fp16, x = input_461_cast_fp16)[name = string("normed_553_cast_fp16")]; - tensor normed_555_begin_0 = const()[name = string("normed_555_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_555_end_0 = const()[name = string("normed_555_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_555_end_mask_0 = const()[name = string("normed_555_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_555_cast_fp16 = slice_by_index(begin = normed_555_begin_0, end = normed_555_end_0, end_mask = normed_555_end_mask_0, x = normed_553_cast_fp16)[name = string("normed_555_cast_fp16")]; - tensor var_15657_to_fp16 = const()[name = string("op_15657_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490537152)))]; - tensor hidden_states_371_cast_fp16 = mul(x = normed_555_cast_fp16, y = var_15657_to_fp16)[name = string("hidden_states_371_cast_fp16")]; - tensor var_15662 = const()[name = string("op_15662"), val = tensor([0, 2, 1])]; - tensor var_15665_axes_0 = const()[name = string("op_15665_axes_0"), val = tensor([2])]; - tensor var_15663_cast_fp16 = transpose(perm = var_15662, x = hidden_states_371_cast_fp16)[name = string("transpose_17")]; - tensor var_15665_cast_fp16 = expand_dims(axes = var_15665_axes_0, x = var_15663_cast_fp16)[name = string("op_15665_cast_fp16")]; - string var_15681_pad_type_0 = const()[name = string("op_15681_pad_type_0"), val = string("valid")]; - tensor var_15681_strides_0 = const()[name = string("op_15681_strides_0"), val = tensor([1, 1])]; - tensor var_15681_pad_0 = const()[name = string("op_15681_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_15681_dilations_0 = const()[name = string("op_15681_dilations_0"), val = tensor([1, 1])]; - int32 var_15681_groups_0 = const()[name = string("op_15681_groups_0"), val = int32(1)]; - tensor var_15681 = conv(dilations = var_15681_dilations_0, groups = var_15681_groups_0, pad = var_15681_pad_0, pad_type = var_15681_pad_type_0, strides = var_15681_strides_0, weight = model_model_layers_23_self_attn_q_proj_weight_palettized, x = var_15665_cast_fp16)[name = string("op_15681")]; - tensor var_15686 = const()[name = string("op_15686"), val = tensor([1, 4, 1, 256])]; - tensor var_15687 = reshape(shape = var_15686, x = var_15681)[name = string("op_15687")]; - string var_15703_pad_type_0 = const()[name = string("op_15703_pad_type_0"), val = string("valid")]; - tensor var_15703_strides_0 = const()[name = string("op_15703_strides_0"), val = tensor([1, 1])]; - tensor var_15703_pad_0 = const()[name = string("op_15703_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_15703_dilations_0 = const()[name = string("op_15703_dilations_0"), val = tensor([1, 1])]; - int32 var_15703_groups_0 = const()[name = string("op_15703_groups_0"), val = int32(1)]; - tensor var_15703 = conv(dilations = var_15703_dilations_0, groups = var_15703_groups_0, pad = var_15703_pad_0, pad_type = var_15703_pad_type_0, strides = var_15703_strides_0, weight = model_model_layers_23_self_attn_k_proj_weight_palettized, x = var_15665_cast_fp16)[name = string("op_15703")]; - tensor var_15708 = const()[name = string("op_15708"), val = tensor([1, 1, 1, 256])]; - tensor var_15709 = reshape(shape = var_15708, x = var_15703)[name = string("op_15709")]; - string var_15725_pad_type_0 = const()[name = string("op_15725_pad_type_0"), val = string("valid")]; - tensor var_15725_strides_0 = const()[name = string("op_15725_strides_0"), val = tensor([1, 1])]; - tensor var_15725_pad_0 = const()[name = string("op_15725_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_15725_dilations_0 = const()[name = string("op_15725_dilations_0"), val = tensor([1, 1])]; - int32 var_15725_groups_0 = const()[name = string("op_15725_groups_0"), val = int32(1)]; - tensor var_15725 = conv(dilations = var_15725_dilations_0, groups = var_15725_groups_0, pad = var_15725_pad_0, pad_type = var_15725_pad_type_0, strides = var_15725_strides_0, weight = model_model_layers_23_self_attn_v_proj_weight_palettized, x = var_15665_cast_fp16)[name = string("op_15725")]; - tensor var_15730 = const()[name = string("op_15730"), val = tensor([1, 1, 1, 256])]; - tensor var_15731 = reshape(shape = var_15730, x = var_15725)[name = string("op_15731")]; - int32 var_15746 = const()[name = string("op_15746"), val = int32(-1)]; - fp16 const_878_promoted = const()[name = string("const_878_promoted"), val = fp16(-0x1p+0)]; - tensor var_15748 = mul(x = var_15687, y = const_878_promoted)[name = string("op_15748")]; - bool input_465_interleave_0 = const()[name = string("input_465_interleave_0"), val = bool(false)]; - tensor input_465 = concat(axis = var_15746, interleave = input_465_interleave_0, values = (var_15687, var_15748))[name = string("input_465")]; - tensor normed_557_axes_0 = const()[name = string("normed_557_axes_0"), val = tensor([-1])]; - fp16 var_15743_to_fp16 = const()[name = string("op_15743_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_557_cast_fp16 = layer_norm(axes = normed_557_axes_0, epsilon = var_15743_to_fp16, x = input_465)[name = string("normed_557_cast_fp16")]; - tensor normed_559_begin_0 = const()[name = string("normed_559_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_559_end_0 = const()[name = string("normed_559_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_559_end_mask_0 = const()[name = string("normed_559_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_559 = slice_by_index(begin = normed_559_begin_0, end = normed_559_end_0, end_mask = normed_559_end_mask_0, x = normed_557_cast_fp16)[name = string("normed_559")]; - tensor var_15762_to_fp16 = const()[name = string("op_15762_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490539520)))]; - tensor q_47_cast_fp16 = mul(x = normed_559, y = var_15762_to_fp16)[name = string("q_47_cast_fp16")]; - int32 var_15773 = const()[name = string("op_15773"), val = int32(-1)]; - fp16 const_882_promoted = const()[name = string("const_882_promoted"), val = fp16(-0x1p+0)]; - tensor var_15775 = mul(x = var_15709, y = const_882_promoted)[name = string("op_15775")]; - bool input_467_interleave_0 = const()[name = string("input_467_interleave_0"), val = bool(false)]; - tensor input_467 = concat(axis = var_15773, interleave = input_467_interleave_0, values = (var_15709, var_15775))[name = string("input_467")]; - tensor normed_561_axes_0 = const()[name = string("normed_561_axes_0"), val = tensor([-1])]; - fp16 var_15770_to_fp16 = const()[name = string("op_15770_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_561_cast_fp16 = layer_norm(axes = normed_561_axes_0, epsilon = var_15770_to_fp16, x = input_467)[name = string("normed_561_cast_fp16")]; - tensor normed_563_begin_0 = const()[name = string("normed_563_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_563_end_0 = const()[name = string("normed_563_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_563_end_mask_0 = const()[name = string("normed_563_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_563 = slice_by_index(begin = normed_563_begin_0, end = normed_563_end_0, end_mask = normed_563_end_mask_0, x = normed_561_cast_fp16)[name = string("normed_563")]; - tensor var_15789_to_fp16 = const()[name = string("op_15789_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490540096)))]; - tensor k_47_cast_fp16 = mul(x = normed_563, y = var_15789_to_fp16)[name = string("k_47_cast_fp16")]; - tensor var_15791_cast_fp16 = mul(x = q_47_cast_fp16, y = cos_21_cast_fp16)[name = string("op_15791_cast_fp16")]; - tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_93_cast_fp16 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_47_cast_fp16)[name = string("x1_93_cast_fp16")]; - tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_93_cast_fp16 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_47_cast_fp16)[name = string("x2_93_cast_fp16")]; - fp16 const_888_promoted_to_fp16 = const()[name = string("const_888_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15812_cast_fp16 = mul(x = x2_93_cast_fp16, y = const_888_promoted_to_fp16)[name = string("op_15812_cast_fp16")]; - int32 var_15814 = const()[name = string("op_15814"), val = int32(-1)]; - bool var_15815_interleave_0 = const()[name = string("op_15815_interleave_0"), val = bool(false)]; - tensor var_15815_cast_fp16 = concat(axis = var_15814, interleave = var_15815_interleave_0, values = (var_15812_cast_fp16, x1_93_cast_fp16))[name = string("op_15815_cast_fp16")]; - tensor var_15816_cast_fp16 = mul(x = var_15815_cast_fp16, y = sin_21_cast_fp16)[name = string("op_15816_cast_fp16")]; - tensor query_states_93_cast_fp16 = add(x = var_15791_cast_fp16, y = var_15816_cast_fp16)[name = string("query_states_93_cast_fp16")]; - tensor var_15819_cast_fp16 = mul(x = k_47_cast_fp16, y = cos_21_cast_fp16)[name = string("op_15819_cast_fp16")]; - tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_95_cast_fp16 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = k_47_cast_fp16)[name = string("x1_95_cast_fp16")]; - tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_95_cast_fp16 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = k_47_cast_fp16)[name = string("x2_95_cast_fp16")]; - fp16 const_891_promoted_to_fp16 = const()[name = string("const_891_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15840_cast_fp16 = mul(x = x2_95_cast_fp16, y = const_891_promoted_to_fp16)[name = string("op_15840_cast_fp16")]; - int32 var_15842 = const()[name = string("op_15842"), val = int32(-1)]; - bool var_15843_interleave_0 = const()[name = string("op_15843_interleave_0"), val = bool(false)]; - tensor var_15843_cast_fp16 = concat(axis = var_15842, interleave = var_15843_interleave_0, values = (var_15840_cast_fp16, x1_95_cast_fp16))[name = string("op_15843_cast_fp16")]; - tensor var_15844_cast_fp16 = mul(x = var_15843_cast_fp16, y = sin_21_cast_fp16)[name = string("op_15844_cast_fp16")]; - tensor key_states_93_cast_fp16 = add(x = var_15819_cast_fp16, y = var_15844_cast_fp16)[name = string("key_states_93_cast_fp16")]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_7_stride_0, update = key_states_93_cast_fp16, x = coreml_update_state_87)[name = string("model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_46_write_state")]; - tensor coreml_update_state_98 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_46")]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_66, begin_mask = model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0, end = concat_67, end_mask = model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_8_stride_0, update = var_15731, x = coreml_update_state_98)[name = string("model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_47_write_state")]; - tensor coreml_update_state_99 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_47")]; - tensor var_15899_begin_0 = const()[name = string("op_15899_begin_0"), val = tensor([3, 0, 0, 0])]; - tensor var_15899_end_0 = const()[name = string("op_15899_end_0"), val = tensor([4, 1, 4096, 256])]; - tensor var_15899_end_mask_0 = const()[name = string("op_15899_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_15899_cast_fp16 = slice_by_index(begin = var_15899_begin_0, end = var_15899_end_0, end_mask = var_15899_end_mask_0, x = coreml_update_state_99)[name = string("op_15899_cast_fp16")]; - tensor var_15906_begin_0 = const()[name = string("op_15906_begin_0"), val = tensor([7, 0, 0, 0])]; - tensor var_15906_end_0 = const()[name = string("op_15906_end_0"), val = tensor([1, 1, 4096, 256])]; - tensor var_15906_end_mask_0 = const()[name = string("op_15906_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_15906_cast_fp16 = slice_by_index(begin = var_15906_begin_0, end = var_15906_end_0, end_mask = var_15906_end_mask_0, x = coreml_update_state_99)[name = string("op_15906_cast_fp16")]; - tensor var_15943 = const()[name = string("op_15943"), val = tensor([1, 4, 1, 1])]; - tensor x_373_cast_fp16 = tile(reps = var_15943, x = var_15899_cast_fp16)[name = string("x_373_cast_fp16")]; - tensor var_15963 = const()[name = string("op_15963"), val = tensor([1, 4, 1, 1])]; - tensor x_379_cast_fp16 = tile(reps = var_15963, x = var_15906_cast_fp16)[name = string("x_379_cast_fp16")]; - bool var_15990_transpose_x_1 = const()[name = string("op_15990_transpose_x_1"), val = bool(false)]; - bool var_15990_transpose_y_1 = const()[name = string("op_15990_transpose_y_1"), val = bool(true)]; - tensor var_15990 = matmul(transpose_x = var_15990_transpose_x_1, transpose_y = var_15990_transpose_y_1, x = query_states_93_cast_fp16, y = x_373_cast_fp16)[name = string("op_15990")]; - fp16 var_15991_to_fp16 = const()[name = string("op_15991_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_139_cast_fp16 = mul(x = var_15990, y = var_15991_to_fp16)[name = string("attn_weights_139_cast_fp16")]; - tensor attn_weights_141_cast_fp16 = add(x = attn_weights_139_cast_fp16, y = causal_mask)[name = string("attn_weights_141_cast_fp16")]; - int32 var_16026 = const()[name = string("op_16026"), val = int32(-1)]; - tensor attn_weights_143_cast_fp16 = softmax(axis = var_16026, x = attn_weights_141_cast_fp16)[name = string("attn_weights_143_cast_fp16")]; - bool attn_output_231_transpose_x_0 = const()[name = string("attn_output_231_transpose_x_0"), val = bool(false)]; - bool attn_output_231_transpose_y_0 = const()[name = string("attn_output_231_transpose_y_0"), val = bool(false)]; - tensor attn_output_231_cast_fp16 = matmul(transpose_x = attn_output_231_transpose_x_0, transpose_y = attn_output_231_transpose_y_0, x = attn_weights_143_cast_fp16, y = x_379_cast_fp16)[name = string("attn_output_231_cast_fp16")]; - tensor var_16037_perm_0 = const()[name = string("op_16037_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_16041 = const()[name = string("op_16041"), val = tensor([1, 1, 1024])]; - tensor var_16037_cast_fp16 = transpose(perm = var_16037_perm_0, x = attn_output_231_cast_fp16)[name = string("transpose_16")]; - tensor attn_output_235_cast_fp16 = reshape(shape = var_16041, x = var_16037_cast_fp16)[name = string("attn_output_235_cast_fp16")]; - tensor var_16046 = const()[name = string("op_16046"), val = tensor([0, 2, 1])]; - string var_16062_pad_type_0 = const()[name = string("op_16062_pad_type_0"), val = string("valid")]; - int32 var_16062_groups_0 = const()[name = string("op_16062_groups_0"), val = int32(1)]; - tensor var_16062_strides_0 = const()[name = string("op_16062_strides_0"), val = tensor([1])]; - tensor var_16062_pad_0 = const()[name = string("op_16062_pad_0"), val = tensor([0, 0])]; - tensor var_16062_dilations_0 = const()[name = string("op_16062_dilations_0"), val = tensor([1])]; - tensor squeeze_23_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490540672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491425472))))[name = string("squeeze_23_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_16047_cast_fp16 = transpose(perm = var_16046, x = attn_output_235_cast_fp16)[name = string("transpose_15")]; - tensor var_16062_cast_fp16 = conv(dilations = var_16062_dilations_0, groups = var_16062_groups_0, pad = var_16062_pad_0, pad_type = var_16062_pad_type_0, strides = var_16062_strides_0, weight = squeeze_23_cast_fp16_to_fp32_to_fp16_palettized, x = var_16047_cast_fp16)[name = string("op_16062_cast_fp16")]; - tensor var_16066 = const()[name = string("op_16066"), val = tensor([0, 2, 1])]; - int32 var_16077 = const()[name = string("op_16077"), val = int32(-1)]; - fp16 const_900_promoted_to_fp16 = const()[name = string("const_900_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_377_cast_fp16 = transpose(perm = var_16066, x = var_16062_cast_fp16)[name = string("transpose_14")]; - tensor var_16079_cast_fp16 = mul(x = hidden_states_377_cast_fp16, y = const_900_promoted_to_fp16)[name = string("op_16079_cast_fp16")]; - bool input_471_interleave_0 = const()[name = string("input_471_interleave_0"), val = bool(false)]; - tensor input_471_cast_fp16 = concat(axis = var_16077, interleave = input_471_interleave_0, values = (hidden_states_377_cast_fp16, var_16079_cast_fp16))[name = string("input_471_cast_fp16")]; - tensor normed_565_axes_0 = const()[name = string("normed_565_axes_0"), val = tensor([-1])]; - fp16 var_16074_to_fp16 = const()[name = string("op_16074_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_565_cast_fp16 = layer_norm(axes = normed_565_axes_0, epsilon = var_16074_to_fp16, x = input_471_cast_fp16)[name = string("normed_565_cast_fp16")]; - tensor normed_567_begin_0 = const()[name = string("normed_567_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_567_end_0 = const()[name = string("normed_567_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_567_end_mask_0 = const()[name = string("normed_567_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_567_cast_fp16 = slice_by_index(begin = normed_567_begin_0, end = normed_567_end_0, end_mask = normed_567_end_mask_0, x = normed_565_cast_fp16)[name = string("normed_567_cast_fp16")]; - tensor var_16093_to_fp16 = const()[name = string("op_16093_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491443968)))]; - tensor attn_output_239_cast_fp16 = mul(x = normed_567_cast_fp16, y = var_16093_to_fp16)[name = string("attn_output_239_cast_fp16")]; - tensor hidden_states_379_cast_fp16 = add(x = hidden_states_369_cast_fp16, y = attn_output_239_cast_fp16)[name = string("hidden_states_379_cast_fp16")]; - int32 var_16106 = const()[name = string("op_16106"), val = int32(-1)]; - fp16 const_904_promoted_to_fp16 = const()[name = string("const_904_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16108_cast_fp16 = mul(x = hidden_states_379_cast_fp16, y = const_904_promoted_to_fp16)[name = string("op_16108_cast_fp16")]; - bool input_473_interleave_0 = const()[name = string("input_473_interleave_0"), val = bool(false)]; - tensor input_473_cast_fp16 = concat(axis = var_16106, interleave = input_473_interleave_0, values = (hidden_states_379_cast_fp16, var_16108_cast_fp16))[name = string("input_473_cast_fp16")]; - tensor normed_569_axes_0 = const()[name = string("normed_569_axes_0"), val = tensor([-1])]; - fp16 var_16103_to_fp16 = const()[name = string("op_16103_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_569_cast_fp16 = layer_norm(axes = normed_569_axes_0, epsilon = var_16103_to_fp16, x = input_473_cast_fp16)[name = string("normed_569_cast_fp16")]; - tensor normed_571_begin_0 = const()[name = string("normed_571_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_571_end_0 = const()[name = string("normed_571_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_571_end_mask_0 = const()[name = string("normed_571_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_571_cast_fp16 = slice_by_index(begin = normed_571_begin_0, end = normed_571_end_0, end_mask = normed_571_end_mask_0, x = normed_569_cast_fp16)[name = string("normed_571_cast_fp16")]; - tensor var_16122_to_fp16 = const()[name = string("op_16122_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491446336)))]; - tensor x_381_cast_fp16 = mul(x = normed_571_cast_fp16, y = var_16122_to_fp16)[name = string("x_381_cast_fp16")]; - tensor var_16134 = const()[name = string("op_16134"), val = tensor([0, 2, 1])]; - tensor input_475_axes_0 = const()[name = string("input_475_axes_0"), val = tensor([2])]; - tensor var_16135_cast_fp16 = transpose(perm = var_16134, x = x_381_cast_fp16)[name = string("transpose_13")]; - tensor input_475_cast_fp16 = expand_dims(axes = input_475_axes_0, x = var_16135_cast_fp16)[name = string("input_475_cast_fp16")]; - string x_383_pad_type_0 = const()[name = string("x_383_pad_type_0"), val = string("valid")]; - tensor x_383_strides_0 = const()[name = string("x_383_strides_0"), val = tensor([1, 1])]; - tensor x_383_pad_0 = const()[name = string("x_383_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_383_dilations_0 = const()[name = string("x_383_dilations_0"), val = tensor([1, 1])]; - int32 x_383_groups_0 = const()[name = string("x_383_groups_0"), val = int32(1)]; - tensor model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491448704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(497420736))))[name = string("model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_383_cast_fp16 = conv(dilations = x_383_dilations_0, groups = x_383_groups_0, pad = x_383_pad_0, pad_type = x_383_pad_type_0, strides = x_383_strides_0, weight = model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_475_cast_fp16)[name = string("x_383_cast_fp16")]; - string b_47_pad_type_0 = const()[name = string("b_47_pad_type_0"), val = string("valid")]; - tensor b_47_strides_0 = const()[name = string("b_47_strides_0"), val = tensor([1, 1])]; - tensor b_47_pad_0 = const()[name = string("b_47_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_47_dilations_0 = const()[name = string("b_47_dilations_0"), val = tensor([1, 1])]; - int32 b_47_groups_0 = const()[name = string("b_47_groups_0"), val = int32(1)]; - tensor model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(497531392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503503424))))[name = string("model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_47_cast_fp16 = conv(dilations = b_47_dilations_0, groups = b_47_groups_0, pad = b_47_pad_0, pad_type = b_47_pad_type_0, strides = b_47_strides_0, weight = model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_475_cast_fp16)[name = string("b_47_cast_fp16")]; - string var_16160_mode_0 = const()[name = string("op_16160_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_16160_cast_fp16 = gelu(mode = var_16160_mode_0, x = x_383_cast_fp16)[name = string("op_16160_cast_fp16")]; - tensor input_477_cast_fp16 = mul(x = var_16160_cast_fp16, y = b_47_cast_fp16)[name = string("input_477_cast_fp16")]; - string e_47_pad_type_0 = const()[name = string("e_47_pad_type_0"), val = string("valid")]; - tensor e_47_strides_0 = const()[name = string("e_47_strides_0"), val = tensor([1, 1])]; - tensor e_47_pad_0 = const()[name = string("e_47_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_47_dilations_0 = const()[name = string("e_47_dilations_0"), val = tensor([1, 1])]; - int32 e_47_groups_0 = const()[name = string("e_47_groups_0"), val = int32(1)]; - tensor model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503614080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509586112))))[name = string("model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_47_cast_fp16 = conv(dilations = e_47_dilations_0, groups = e_47_groups_0, pad = e_47_pad_0, pad_type = e_47_pad_type_0, strides = e_47_strides_0, weight = model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_477_cast_fp16)[name = string("e_47_cast_fp16")]; - tensor var_16168_axes_0 = const()[name = string("op_16168_axes_0"), val = tensor([2])]; - tensor var_16168_cast_fp16 = squeeze(axes = var_16168_axes_0, x = e_47_cast_fp16)[name = string("op_16168_cast_fp16")]; - tensor var_16169 = const()[name = string("op_16169"), val = tensor([0, 2, 1])]; - int32 var_16180 = const()[name = string("op_16180"), val = int32(-1)]; - fp16 const_908_promoted_to_fp16 = const()[name = string("const_908_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_381_cast_fp16 = transpose(perm = var_16169, x = var_16168_cast_fp16)[name = string("transpose_12")]; - tensor var_16182_cast_fp16 = mul(x = hidden_states_381_cast_fp16, y = const_908_promoted_to_fp16)[name = string("op_16182_cast_fp16")]; - bool input_479_interleave_0 = const()[name = string("input_479_interleave_0"), val = bool(false)]; - tensor input_479_cast_fp16 = concat(axis = var_16180, interleave = input_479_interleave_0, values = (hidden_states_381_cast_fp16, var_16182_cast_fp16))[name = string("input_479_cast_fp16")]; - tensor normed_573_axes_0 = const()[name = string("normed_573_axes_0"), val = tensor([-1])]; - fp16 var_16177_to_fp16 = const()[name = string("op_16177_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_573_cast_fp16 = layer_norm(axes = normed_573_axes_0, epsilon = var_16177_to_fp16, x = input_479_cast_fp16)[name = string("normed_573_cast_fp16")]; - tensor normed_575_begin_0 = const()[name = string("normed_575_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_575_end_0 = const()[name = string("normed_575_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_575_end_mask_0 = const()[name = string("normed_575_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_575_cast_fp16 = slice_by_index(begin = normed_575_begin_0, end = normed_575_end_0, end_mask = normed_575_end_mask_0, x = normed_573_cast_fp16)[name = string("normed_575_cast_fp16")]; - tensor var_16196_to_fp16 = const()[name = string("op_16196_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509604608)))]; - tensor hidden_states_383_cast_fp16 = mul(x = normed_575_cast_fp16, y = var_16196_to_fp16)[name = string("hidden_states_383_cast_fp16")]; - tensor hidden_states_385_cast_fp16 = add(x = hidden_states_379_cast_fp16, y = hidden_states_383_cast_fp16)[name = string("hidden_states_385_cast_fp16")]; - int32 var_16247 = const()[name = string("op_16247"), val = int32(-1)]; - fp16 const_912_promoted_to_fp16 = const()[name = string("const_912_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16249_cast_fp16 = mul(x = hidden_states_385_cast_fp16, y = const_912_promoted_to_fp16)[name = string("op_16249_cast_fp16")]; - bool input_481_interleave_0 = const()[name = string("input_481_interleave_0"), val = bool(false)]; - tensor input_481_cast_fp16 = concat(axis = var_16247, interleave = input_481_interleave_0, values = (hidden_states_385_cast_fp16, var_16249_cast_fp16))[name = string("input_481_cast_fp16")]; - tensor normed_577_axes_0 = const()[name = string("normed_577_axes_0"), val = tensor([-1])]; - fp16 var_16244_to_fp16 = const()[name = string("op_16244_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_577_cast_fp16 = layer_norm(axes = normed_577_axes_0, epsilon = var_16244_to_fp16, x = input_481_cast_fp16)[name = string("normed_577_cast_fp16")]; - tensor normed_579_begin_0 = const()[name = string("normed_579_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_579_end_0 = const()[name = string("normed_579_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_579_end_mask_0 = const()[name = string("normed_579_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_579_cast_fp16 = slice_by_index(begin = normed_579_begin_0, end = normed_579_end_0, end_mask = normed_579_end_mask_0, x = normed_577_cast_fp16)[name = string("normed_579_cast_fp16")]; - tensor var_16263_to_fp16 = const()[name = string("op_16263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509606976)))]; - tensor hidden_states_387_cast_fp16 = mul(x = normed_579_cast_fp16, y = var_16263_to_fp16)[name = string("hidden_states_387_cast_fp16")]; - tensor var_16268 = const()[name = string("op_16268"), val = tensor([0, 2, 1])]; - tensor var_16271_axes_0 = const()[name = string("op_16271_axes_0"), val = tensor([2])]; - tensor var_16269_cast_fp16 = transpose(perm = var_16268, x = hidden_states_387_cast_fp16)[name = string("transpose_11")]; - tensor var_16271_cast_fp16 = expand_dims(axes = var_16271_axes_0, x = var_16269_cast_fp16)[name = string("op_16271_cast_fp16")]; - string var_16287_pad_type_0 = const()[name = string("op_16287_pad_type_0"), val = string("valid")]; - tensor var_16287_strides_0 = const()[name = string("op_16287_strides_0"), val = tensor([1, 1])]; - tensor var_16287_pad_0 = const()[name = string("op_16287_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_16287_dilations_0 = const()[name = string("op_16287_dilations_0"), val = tensor([1, 1])]; - int32 var_16287_groups_0 = const()[name = string("op_16287_groups_0"), val = int32(1)]; - tensor var_16287 = conv(dilations = var_16287_dilations_0, groups = var_16287_groups_0, pad = var_16287_pad_0, pad_type = var_16287_pad_type_0, strides = var_16287_strides_0, weight = model_model_layers_24_self_attn_q_proj_weight_palettized, x = var_16271_cast_fp16)[name = string("op_16287")]; - tensor var_16292 = const()[name = string("op_16292"), val = tensor([1, 4, 1, 256])]; - tensor var_16293 = reshape(shape = var_16292, x = var_16287)[name = string("op_16293")]; - string var_16309_pad_type_0 = const()[name = string("op_16309_pad_type_0"), val = string("valid")]; - tensor var_16309_strides_0 = const()[name = string("op_16309_strides_0"), val = tensor([1, 1])]; - tensor var_16309_pad_0 = const()[name = string("op_16309_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_16309_dilations_0 = const()[name = string("op_16309_dilations_0"), val = tensor([1, 1])]; - int32 var_16309_groups_0 = const()[name = string("op_16309_groups_0"), val = int32(1)]; - tensor var_16309 = conv(dilations = var_16309_dilations_0, groups = var_16309_groups_0, pad = var_16309_pad_0, pad_type = var_16309_pad_type_0, strides = var_16309_strides_0, weight = model_model_layers_24_self_attn_k_proj_weight_palettized, x = var_16271_cast_fp16)[name = string("op_16309")]; - tensor var_16314 = const()[name = string("op_16314"), val = tensor([1, 1, 1, 256])]; - tensor var_16315 = reshape(shape = var_16314, x = var_16309)[name = string("op_16315")]; - string var_16331_pad_type_0 = const()[name = string("op_16331_pad_type_0"), val = string("valid")]; - tensor var_16331_strides_0 = const()[name = string("op_16331_strides_0"), val = tensor([1, 1])]; - tensor var_16331_pad_0 = const()[name = string("op_16331_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_16331_dilations_0 = const()[name = string("op_16331_dilations_0"), val = tensor([1, 1])]; - int32 var_16331_groups_0 = const()[name = string("op_16331_groups_0"), val = int32(1)]; - tensor var_16331 = conv(dilations = var_16331_dilations_0, groups = var_16331_groups_0, pad = var_16331_pad_0, pad_type = var_16331_pad_type_0, strides = var_16331_strides_0, weight = model_model_layers_24_self_attn_v_proj_weight_palettized, x = var_16271_cast_fp16)[name = string("op_16331")]; - tensor var_16336 = const()[name = string("op_16336"), val = tensor([1, 1, 1, 256])]; - tensor var_16337 = reshape(shape = var_16336, x = var_16331)[name = string("op_16337")]; - int32 var_16352 = const()[name = string("op_16352"), val = int32(-1)]; - fp16 const_916_promoted = const()[name = string("const_916_promoted"), val = fp16(-0x1p+0)]; - tensor var_16354 = mul(x = var_16293, y = const_916_promoted)[name = string("op_16354")]; - bool input_485_interleave_0 = const()[name = string("input_485_interleave_0"), val = bool(false)]; - tensor input_485 = concat(axis = var_16352, interleave = input_485_interleave_0, values = (var_16293, var_16354))[name = string("input_485")]; - tensor normed_581_axes_0 = const()[name = string("normed_581_axes_0"), val = tensor([-1])]; - fp16 var_16349_to_fp16 = const()[name = string("op_16349_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_581_cast_fp16 = layer_norm(axes = normed_581_axes_0, epsilon = var_16349_to_fp16, x = input_485)[name = string("normed_581_cast_fp16")]; - tensor normed_583_begin_0 = const()[name = string("normed_583_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_583_end_0 = const()[name = string("normed_583_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_583_end_mask_0 = const()[name = string("normed_583_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_583 = slice_by_index(begin = normed_583_begin_0, end = normed_583_end_0, end_mask = normed_583_end_mask_0, x = normed_581_cast_fp16)[name = string("normed_583")]; - tensor var_16368_to_fp16 = const()[name = string("op_16368_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509609344)))]; - tensor q_49_cast_fp16 = mul(x = normed_583, y = var_16368_to_fp16)[name = string("q_49_cast_fp16")]; - int32 var_16379 = const()[name = string("op_16379"), val = int32(-1)]; - fp16 const_920_promoted = const()[name = string("const_920_promoted"), val = fp16(-0x1p+0)]; - tensor var_16381 = mul(x = var_16315, y = const_920_promoted)[name = string("op_16381")]; - bool input_487_interleave_0 = const()[name = string("input_487_interleave_0"), val = bool(false)]; - tensor input_487 = concat(axis = var_16379, interleave = input_487_interleave_0, values = (var_16315, var_16381))[name = string("input_487")]; - tensor normed_585_axes_0 = const()[name = string("normed_585_axes_0"), val = tensor([-1])]; - fp16 var_16376_to_fp16 = const()[name = string("op_16376_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_585_cast_fp16 = layer_norm(axes = normed_585_axes_0, epsilon = var_16376_to_fp16, x = input_487)[name = string("normed_585_cast_fp16")]; - tensor normed_587_begin_0 = const()[name = string("normed_587_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_587_end_0 = const()[name = string("normed_587_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_587_end_mask_0 = const()[name = string("normed_587_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_587 = slice_by_index(begin = normed_587_begin_0, end = normed_587_end_0, end_mask = normed_587_end_mask_0, x = normed_585_cast_fp16)[name = string("normed_587")]; - tensor var_16395_to_fp16 = const()[name = string("op_16395_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509609920)))]; - tensor k_49_cast_fp16 = mul(x = normed_587, y = var_16395_to_fp16)[name = string("k_49_cast_fp16")]; - tensor var_16397_cast_fp16 = mul(x = q_49_cast_fp16, y = cos_1_cast_fp16)[name = string("op_16397_cast_fp16")]; - tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_97_cast_fp16 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = q_49_cast_fp16)[name = string("x1_97_cast_fp16")]; - tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_97_cast_fp16 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = q_49_cast_fp16)[name = string("x2_97_cast_fp16")]; - fp16 const_926_promoted_to_fp16 = const()[name = string("const_926_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16418_cast_fp16 = mul(x = x2_97_cast_fp16, y = const_926_promoted_to_fp16)[name = string("op_16418_cast_fp16")]; - int32 var_16420 = const()[name = string("op_16420"), val = int32(-1)]; - bool var_16421_interleave_0 = const()[name = string("op_16421_interleave_0"), val = bool(false)]; - tensor var_16421_cast_fp16 = concat(axis = var_16420, interleave = var_16421_interleave_0, values = (var_16418_cast_fp16, x1_97_cast_fp16))[name = string("op_16421_cast_fp16")]; - tensor var_16422_cast_fp16 = mul(x = var_16421_cast_fp16, y = sin_1_cast_fp16)[name = string("op_16422_cast_fp16")]; - tensor query_states_97_cast_fp16 = add(x = var_16397_cast_fp16, y = var_16422_cast_fp16)[name = string("query_states_97_cast_fp16")]; - tensor var_16425_cast_fp16 = mul(x = k_49_cast_fp16, y = cos_1_cast_fp16)[name = string("op_16425_cast_fp16")]; - tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_99_cast_fp16 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = k_49_cast_fp16)[name = string("x1_99_cast_fp16")]; - tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_99_cast_fp16 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = k_49_cast_fp16)[name = string("x2_99_cast_fp16")]; - fp16 const_929_promoted_to_fp16 = const()[name = string("const_929_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16446_cast_fp16 = mul(x = x2_99_cast_fp16, y = const_929_promoted_to_fp16)[name = string("op_16446_cast_fp16")]; - int32 var_16448 = const()[name = string("op_16448"), val = int32(-1)]; - bool var_16449_interleave_0 = const()[name = string("op_16449_interleave_0"), val = bool(false)]; - tensor var_16449_cast_fp16 = concat(axis = var_16448, interleave = var_16449_interleave_0, values = (var_16446_cast_fp16, x1_99_cast_fp16))[name = string("op_16449_cast_fp16")]; - tensor var_16450_cast_fp16 = mul(x = var_16449_cast_fp16, y = sin_1_cast_fp16)[name = string("op_16450_cast_fp16")]; - tensor key_states_97_cast_fp16 = add(x = var_16425_cast_fp16, y = var_16450_cast_fp16)[name = string("key_states_97_cast_fp16")]; - tensor expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor([20])]; - tensor expand_dims_289 = const()[name = string("expand_dims_289"), val = tensor([0])]; - tensor expand_dims_291 = const()[name = string("expand_dims_291"), val = tensor([0])]; - tensor expand_dims_292 = const()[name = string("expand_dims_292"), val = tensor([21])]; - int32 concat_194_axis_0 = const()[name = string("concat_194_axis_0"), val = int32(0)]; - bool concat_194_interleave_0 = const()[name = string("concat_194_interleave_0"), val = bool(false)]; - tensor concat_194 = concat(axis = concat_194_axis_0, interleave = concat_194_interleave_0, values = (expand_dims_288, expand_dims_289, current_pos, expand_dims_291))[name = string("concat_194")]; - tensor concat_195_values1_0 = const()[name = string("concat_195_values1_0"), val = tensor([0])]; - tensor concat_195_values3_0 = const()[name = string("concat_195_values3_0"), val = tensor([0])]; - int32 concat_195_axis_0 = const()[name = string("concat_195_axis_0"), val = int32(0)]; - bool concat_195_interleave_0 = const()[name = string("concat_195_interleave_0"), val = bool(false)]; - tensor concat_195 = concat(axis = concat_195_axis_0, interleave = concat_195_interleave_0, values = (expand_dims_292, concat_195_values1_0, var_1909, concat_195_values3_0))[name = string("concat_195")]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16 = slice_update(begin = concat_194, begin_mask = model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0, end = concat_195, end_mask = model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_41_stride_0, update = key_states_97_cast_fp16, x = coreml_update_state_97)[name = string("model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_48_write_state")]; - tensor coreml_update_state_100 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_48")]; - tensor expand_dims_294 = const()[name = string("expand_dims_294"), val = tensor([42])]; - tensor expand_dims_295 = const()[name = string("expand_dims_295"), val = tensor([0])]; - tensor expand_dims_297 = const()[name = string("expand_dims_297"), val = tensor([0])]; - tensor expand_dims_298 = const()[name = string("expand_dims_298"), val = tensor([43])]; - int32 concat_198_axis_0 = const()[name = string("concat_198_axis_0"), val = int32(0)]; - bool concat_198_interleave_0 = const()[name = string("concat_198_interleave_0"), val = bool(false)]; - tensor concat_198 = concat(axis = concat_198_axis_0, interleave = concat_198_interleave_0, values = (expand_dims_294, expand_dims_295, current_pos, expand_dims_297))[name = string("concat_198")]; - tensor concat_199_values1_0 = const()[name = string("concat_199_values1_0"), val = tensor([0])]; - tensor concat_199_values3_0 = const()[name = string("concat_199_values3_0"), val = tensor([0])]; - int32 concat_199_axis_0 = const()[name = string("concat_199_axis_0"), val = int32(0)]; - bool concat_199_interleave_0 = const()[name = string("concat_199_interleave_0"), val = bool(false)]; - tensor concat_199 = concat(axis = concat_199_axis_0, interleave = concat_199_interleave_0, values = (expand_dims_298, concat_199_values1_0, var_1909, concat_199_values3_0))[name = string("concat_199")]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16 = slice_update(begin = concat_198, begin_mask = model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0, end = concat_199, end_mask = model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_42_stride_0, update = var_16337, x = coreml_update_state_100)[name = string("model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_49_write_state")]; - tensor coreml_update_state_101 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_49")]; - tensor var_16505_begin_0 = const()[name = string("op_16505_begin_0"), val = tensor([20, 0, 0, 0])]; - tensor var_16505_end_0 = const()[name = string("op_16505_end_0"), val = tensor([21, 1, 512, 256])]; - tensor var_16505_end_mask_0 = const()[name = string("op_16505_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_16505_cast_fp16 = slice_by_index(begin = var_16505_begin_0, end = var_16505_end_0, end_mask = var_16505_end_mask_0, x = coreml_update_state_101)[name = string("op_16505_cast_fp16")]; - tensor var_16512_begin_0 = const()[name = string("op_16512_begin_0"), val = tensor([42, 0, 0, 0])]; - tensor var_16512_end_0 = const()[name = string("op_16512_end_0"), val = tensor([43, 1, 512, 256])]; - tensor var_16512_end_mask_0 = const()[name = string("op_16512_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_16512_cast_fp16 = slice_by_index(begin = var_16512_begin_0, end = var_16512_end_0, end_mask = var_16512_end_mask_0, x = coreml_update_state_101)[name = string("op_16512_cast_fp16")]; - tensor var_16549 = const()[name = string("op_16549"), val = tensor([1, 4, 1, 1])]; - tensor x_389_cast_fp16 = tile(reps = var_16549, x = var_16505_cast_fp16)[name = string("x_389_cast_fp16")]; - tensor var_16569 = const()[name = string("op_16569"), val = tensor([1, 4, 1, 1])]; - tensor x_395_cast_fp16 = tile(reps = var_16569, x = var_16512_cast_fp16)[name = string("x_395_cast_fp16")]; - bool var_16596_transpose_x_1 = const()[name = string("op_16596_transpose_x_1"), val = bool(false)]; - bool var_16596_transpose_y_1 = const()[name = string("op_16596_transpose_y_1"), val = bool(true)]; - tensor var_16596 = matmul(transpose_x = var_16596_transpose_x_1, transpose_y = var_16596_transpose_y_1, x = query_states_97_cast_fp16, y = x_389_cast_fp16)[name = string("op_16596")]; - fp16 var_16597_to_fp16 = const()[name = string("op_16597_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_145_cast_fp16 = mul(x = var_16596, y = var_16597_to_fp16)[name = string("attn_weights_145_cast_fp16")]; - tensor attn_weights_147_cast_fp16 = add(x = attn_weights_145_cast_fp16, y = var_2083)[name = string("attn_weights_147_cast_fp16")]; - int32 var_16632 = const()[name = string("op_16632"), val = int32(-1)]; - tensor attn_weights_149_cast_fp16 = softmax(axis = var_16632, x = attn_weights_147_cast_fp16)[name = string("attn_weights_149_cast_fp16")]; - bool attn_output_241_transpose_x_0 = const()[name = string("attn_output_241_transpose_x_0"), val = bool(false)]; - bool attn_output_241_transpose_y_0 = const()[name = string("attn_output_241_transpose_y_0"), val = bool(false)]; - tensor attn_output_241_cast_fp16 = matmul(transpose_x = attn_output_241_transpose_x_0, transpose_y = attn_output_241_transpose_y_0, x = attn_weights_149_cast_fp16, y = x_395_cast_fp16)[name = string("attn_output_241_cast_fp16")]; - tensor var_16643_perm_0 = const()[name = string("op_16643_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_16647 = const()[name = string("op_16647"), val = tensor([1, 1, 1024])]; - tensor var_16643_cast_fp16 = transpose(perm = var_16643_perm_0, x = attn_output_241_cast_fp16)[name = string("transpose_10")]; - tensor attn_output_245_cast_fp16 = reshape(shape = var_16647, x = var_16643_cast_fp16)[name = string("attn_output_245_cast_fp16")]; - tensor var_16652 = const()[name = string("op_16652"), val = tensor([0, 2, 1])]; - string var_16668_pad_type_0 = const()[name = string("op_16668_pad_type_0"), val = string("valid")]; - int32 var_16668_groups_0 = const()[name = string("op_16668_groups_0"), val = int32(1)]; - tensor var_16668_strides_0 = const()[name = string("op_16668_strides_0"), val = tensor([1])]; - tensor var_16668_pad_0 = const()[name = string("op_16668_pad_0"), val = tensor([0, 0])]; - tensor var_16668_dilations_0 = const()[name = string("op_16668_dilations_0"), val = tensor([1])]; - tensor squeeze_24_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509610496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(510495296))))[name = string("squeeze_24_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_16653_cast_fp16 = transpose(perm = var_16652, x = attn_output_245_cast_fp16)[name = string("transpose_9")]; - tensor var_16668_cast_fp16 = conv(dilations = var_16668_dilations_0, groups = var_16668_groups_0, pad = var_16668_pad_0, pad_type = var_16668_pad_type_0, strides = var_16668_strides_0, weight = squeeze_24_cast_fp16_to_fp32_to_fp16_palettized, x = var_16653_cast_fp16)[name = string("op_16668_cast_fp16")]; - tensor var_16672 = const()[name = string("op_16672"), val = tensor([0, 2, 1])]; - int32 var_16683 = const()[name = string("op_16683"), val = int32(-1)]; - fp16 const_938_promoted_to_fp16 = const()[name = string("const_938_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_393_cast_fp16 = transpose(perm = var_16672, x = var_16668_cast_fp16)[name = string("transpose_8")]; - tensor var_16685_cast_fp16 = mul(x = hidden_states_393_cast_fp16, y = const_938_promoted_to_fp16)[name = string("op_16685_cast_fp16")]; - bool input_491_interleave_0 = const()[name = string("input_491_interleave_0"), val = bool(false)]; - tensor input_491_cast_fp16 = concat(axis = var_16683, interleave = input_491_interleave_0, values = (hidden_states_393_cast_fp16, var_16685_cast_fp16))[name = string("input_491_cast_fp16")]; - tensor normed_589_axes_0 = const()[name = string("normed_589_axes_0"), val = tensor([-1])]; - fp16 var_16680_to_fp16 = const()[name = string("op_16680_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_589_cast_fp16 = layer_norm(axes = normed_589_axes_0, epsilon = var_16680_to_fp16, x = input_491_cast_fp16)[name = string("normed_589_cast_fp16")]; - tensor normed_591_begin_0 = const()[name = string("normed_591_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_591_end_0 = const()[name = string("normed_591_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_591_end_mask_0 = const()[name = string("normed_591_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_591_cast_fp16 = slice_by_index(begin = normed_591_begin_0, end = normed_591_end_0, end_mask = normed_591_end_mask_0, x = normed_589_cast_fp16)[name = string("normed_591_cast_fp16")]; - tensor var_16699_to_fp16 = const()[name = string("op_16699_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(510513792)))]; - tensor attn_output_249_cast_fp16 = mul(x = normed_591_cast_fp16, y = var_16699_to_fp16)[name = string("attn_output_249_cast_fp16")]; - tensor hidden_states_395_cast_fp16 = add(x = hidden_states_385_cast_fp16, y = attn_output_249_cast_fp16)[name = string("hidden_states_395_cast_fp16")]; - int32 var_16712 = const()[name = string("op_16712"), val = int32(-1)]; - fp16 const_942_promoted_to_fp16 = const()[name = string("const_942_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16714_cast_fp16 = mul(x = hidden_states_395_cast_fp16, y = const_942_promoted_to_fp16)[name = string("op_16714_cast_fp16")]; - bool input_493_interleave_0 = const()[name = string("input_493_interleave_0"), val = bool(false)]; - tensor input_493_cast_fp16 = concat(axis = var_16712, interleave = input_493_interleave_0, values = (hidden_states_395_cast_fp16, var_16714_cast_fp16))[name = string("input_493_cast_fp16")]; - tensor normed_593_axes_0 = const()[name = string("normed_593_axes_0"), val = tensor([-1])]; - fp16 var_16709_to_fp16 = const()[name = string("op_16709_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_593_cast_fp16 = layer_norm(axes = normed_593_axes_0, epsilon = var_16709_to_fp16, x = input_493_cast_fp16)[name = string("normed_593_cast_fp16")]; - tensor normed_595_begin_0 = const()[name = string("normed_595_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_595_end_0 = const()[name = string("normed_595_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_595_end_mask_0 = const()[name = string("normed_595_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_595_cast_fp16 = slice_by_index(begin = normed_595_begin_0, end = normed_595_end_0, end_mask = normed_595_end_mask_0, x = normed_593_cast_fp16)[name = string("normed_595_cast_fp16")]; - tensor var_16728_to_fp16 = const()[name = string("op_16728_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(510516160)))]; - tensor x_397_cast_fp16 = mul(x = normed_595_cast_fp16, y = var_16728_to_fp16)[name = string("x_397_cast_fp16")]; - tensor var_16740 = const()[name = string("op_16740"), val = tensor([0, 2, 1])]; - tensor input_495_axes_0 = const()[name = string("input_495_axes_0"), val = tensor([2])]; - tensor var_16741_cast_fp16 = transpose(perm = var_16740, x = x_397_cast_fp16)[name = string("transpose_7")]; - tensor input_495_cast_fp16 = expand_dims(axes = input_495_axes_0, x = var_16741_cast_fp16)[name = string("input_495_cast_fp16")]; - string x_399_pad_type_0 = const()[name = string("x_399_pad_type_0"), val = string("valid")]; - tensor x_399_strides_0 = const()[name = string("x_399_strides_0"), val = tensor([1, 1])]; - tensor x_399_pad_0 = const()[name = string("x_399_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_399_dilations_0 = const()[name = string("x_399_dilations_0"), val = tensor([1, 1])]; - int32 x_399_groups_0 = const()[name = string("x_399_groups_0"), val = int32(1)]; - tensor model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(510518528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516490560))))[name = string("model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_399_cast_fp16 = conv(dilations = x_399_dilations_0, groups = x_399_groups_0, pad = x_399_pad_0, pad_type = x_399_pad_type_0, strides = x_399_strides_0, weight = model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_495_cast_fp16)[name = string("x_399_cast_fp16")]; - string b_49_pad_type_0 = const()[name = string("b_49_pad_type_0"), val = string("valid")]; - tensor b_49_strides_0 = const()[name = string("b_49_strides_0"), val = tensor([1, 1])]; - tensor b_49_pad_0 = const()[name = string("b_49_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_49_dilations_0 = const()[name = string("b_49_dilations_0"), val = tensor([1, 1])]; - int32 b_49_groups_0 = const()[name = string("b_49_groups_0"), val = int32(1)]; - tensor model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516601216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522573248))))[name = string("model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_49_cast_fp16 = conv(dilations = b_49_dilations_0, groups = b_49_groups_0, pad = b_49_pad_0, pad_type = b_49_pad_type_0, strides = b_49_strides_0, weight = model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_495_cast_fp16)[name = string("b_49_cast_fp16")]; - string var_16766_mode_0 = const()[name = string("op_16766_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_16766_cast_fp16 = gelu(mode = var_16766_mode_0, x = x_399_cast_fp16)[name = string("op_16766_cast_fp16")]; - tensor input_497_cast_fp16 = mul(x = var_16766_cast_fp16, y = b_49_cast_fp16)[name = string("input_497_cast_fp16")]; - string e_49_pad_type_0 = const()[name = string("e_49_pad_type_0"), val = string("valid")]; - tensor e_49_strides_0 = const()[name = string("e_49_strides_0"), val = tensor([1, 1])]; - tensor e_49_pad_0 = const()[name = string("e_49_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_49_dilations_0 = const()[name = string("e_49_dilations_0"), val = tensor([1, 1])]; - int32 e_49_groups_0 = const()[name = string("e_49_groups_0"), val = int32(1)]; - tensor model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522683904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528655936))))[name = string("model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_49_cast_fp16 = conv(dilations = e_49_dilations_0, groups = e_49_groups_0, pad = e_49_pad_0, pad_type = e_49_pad_type_0, strides = e_49_strides_0, weight = model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_497_cast_fp16)[name = string("e_49_cast_fp16")]; - tensor var_16774_axes_0 = const()[name = string("op_16774_axes_0"), val = tensor([2])]; - tensor var_16774_cast_fp16 = squeeze(axes = var_16774_axes_0, x = e_49_cast_fp16)[name = string("op_16774_cast_fp16")]; - tensor var_16775 = const()[name = string("op_16775"), val = tensor([0, 2, 1])]; - int32 var_16786 = const()[name = string("op_16786"), val = int32(-1)]; - fp16 const_946_promoted_to_fp16 = const()[name = string("const_946_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_397_cast_fp16 = transpose(perm = var_16775, x = var_16774_cast_fp16)[name = string("transpose_6")]; - tensor var_16788_cast_fp16 = mul(x = hidden_states_397_cast_fp16, y = const_946_promoted_to_fp16)[name = string("op_16788_cast_fp16")]; - bool input_499_interleave_0 = const()[name = string("input_499_interleave_0"), val = bool(false)]; - tensor input_499_cast_fp16 = concat(axis = var_16786, interleave = input_499_interleave_0, values = (hidden_states_397_cast_fp16, var_16788_cast_fp16))[name = string("input_499_cast_fp16")]; - tensor normed_597_axes_0 = const()[name = string("normed_597_axes_0"), val = tensor([-1])]; - fp16 var_16783_to_fp16 = const()[name = string("op_16783_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_597_cast_fp16 = layer_norm(axes = normed_597_axes_0, epsilon = var_16783_to_fp16, x = input_499_cast_fp16)[name = string("normed_597_cast_fp16")]; - tensor normed_599_begin_0 = const()[name = string("normed_599_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_599_end_0 = const()[name = string("normed_599_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_599_end_mask_0 = const()[name = string("normed_599_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_599_cast_fp16 = slice_by_index(begin = normed_599_begin_0, end = normed_599_end_0, end_mask = normed_599_end_mask_0, x = normed_597_cast_fp16)[name = string("normed_599_cast_fp16")]; - tensor var_16802_to_fp16 = const()[name = string("op_16802_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528674432)))]; - tensor hidden_states_399_cast_fp16 = mul(x = normed_599_cast_fp16, y = var_16802_to_fp16)[name = string("hidden_states_399_cast_fp16")]; - tensor hidden_states_401_cast_fp16 = add(x = hidden_states_395_cast_fp16, y = hidden_states_399_cast_fp16)[name = string("hidden_states_401_cast_fp16")]; - int32 var_16853 = const()[name = string("op_16853"), val = int32(-1)]; - fp16 const_950_promoted_to_fp16 = const()[name = string("const_950_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16855_cast_fp16 = mul(x = hidden_states_401_cast_fp16, y = const_950_promoted_to_fp16)[name = string("op_16855_cast_fp16")]; - bool input_501_interleave_0 = const()[name = string("input_501_interleave_0"), val = bool(false)]; - tensor input_501_cast_fp16 = concat(axis = var_16853, interleave = input_501_interleave_0, values = (hidden_states_401_cast_fp16, var_16855_cast_fp16))[name = string("input_501_cast_fp16")]; - tensor normed_601_axes_0 = const()[name = string("normed_601_axes_0"), val = tensor([-1])]; - fp16 var_16850_to_fp16 = const()[name = string("op_16850_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_601_cast_fp16 = layer_norm(axes = normed_601_axes_0, epsilon = var_16850_to_fp16, x = input_501_cast_fp16)[name = string("normed_601_cast_fp16")]; - tensor normed_603_begin_0 = const()[name = string("normed_603_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_603_end_0 = const()[name = string("normed_603_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_603_end_mask_0 = const()[name = string("normed_603_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_603_cast_fp16 = slice_by_index(begin = normed_603_begin_0, end = normed_603_end_0, end_mask = normed_603_end_mask_0, x = normed_601_cast_fp16)[name = string("normed_603_cast_fp16")]; - tensor var_16869_to_fp16 = const()[name = string("op_16869_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528676800)))]; - tensor hidden_states_403_cast_fp16 = mul(x = normed_603_cast_fp16, y = var_16869_to_fp16)[name = string("hidden_states_403_cast_fp16")]; - tensor var_16874 = const()[name = string("op_16874"), val = tensor([0, 2, 1])]; - tensor var_16877_axes_0 = const()[name = string("op_16877_axes_0"), val = tensor([2])]; - tensor var_16875_cast_fp16 = transpose(perm = var_16874, x = hidden_states_403_cast_fp16)[name = string("transpose_5")]; - tensor var_16877_cast_fp16 = expand_dims(axes = var_16877_axes_0, x = var_16875_cast_fp16)[name = string("op_16877_cast_fp16")]; - string var_16893_pad_type_0 = const()[name = string("op_16893_pad_type_0"), val = string("valid")]; - tensor var_16893_strides_0 = const()[name = string("op_16893_strides_0"), val = tensor([1, 1])]; - tensor var_16893_pad_0 = const()[name = string("op_16893_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_16893_dilations_0 = const()[name = string("op_16893_dilations_0"), val = tensor([1, 1])]; - int32 var_16893_groups_0 = const()[name = string("op_16893_groups_0"), val = int32(1)]; - tensor var_16893 = conv(dilations = var_16893_dilations_0, groups = var_16893_groups_0, pad = var_16893_pad_0, pad_type = var_16893_pad_type_0, strides = var_16893_strides_0, weight = model_model_layers_25_self_attn_q_proj_weight_palettized, x = var_16877_cast_fp16)[name = string("op_16893")]; - tensor var_16898 = const()[name = string("op_16898"), val = tensor([1, 4, 1, 256])]; - tensor var_16899 = reshape(shape = var_16898, x = var_16893)[name = string("op_16899")]; - string var_16915_pad_type_0 = const()[name = string("op_16915_pad_type_0"), val = string("valid")]; - tensor var_16915_strides_0 = const()[name = string("op_16915_strides_0"), val = tensor([1, 1])]; - tensor var_16915_pad_0 = const()[name = string("op_16915_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_16915_dilations_0 = const()[name = string("op_16915_dilations_0"), val = tensor([1, 1])]; - int32 var_16915_groups_0 = const()[name = string("op_16915_groups_0"), val = int32(1)]; - tensor var_16915 = conv(dilations = var_16915_dilations_0, groups = var_16915_groups_0, pad = var_16915_pad_0, pad_type = var_16915_pad_type_0, strides = var_16915_strides_0, weight = model_model_layers_25_self_attn_k_proj_weight_palettized, x = var_16877_cast_fp16)[name = string("op_16915")]; - tensor var_16920 = const()[name = string("op_16920"), val = tensor([1, 1, 1, 256])]; - tensor var_16921 = reshape(shape = var_16920, x = var_16915)[name = string("op_16921")]; - string var_16937_pad_type_0 = const()[name = string("op_16937_pad_type_0"), val = string("valid")]; - tensor var_16937_strides_0 = const()[name = string("op_16937_strides_0"), val = tensor([1, 1])]; - tensor var_16937_pad_0 = const()[name = string("op_16937_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_16937_dilations_0 = const()[name = string("op_16937_dilations_0"), val = tensor([1, 1])]; - int32 var_16937_groups_0 = const()[name = string("op_16937_groups_0"), val = int32(1)]; - tensor var_16937 = conv(dilations = var_16937_dilations_0, groups = var_16937_groups_0, pad = var_16937_pad_0, pad_type = var_16937_pad_type_0, strides = var_16937_strides_0, weight = model_model_layers_25_self_attn_v_proj_weight_palettized, x = var_16877_cast_fp16)[name = string("op_16937")]; - tensor var_16942 = const()[name = string("op_16942"), val = tensor([1, 1, 1, 256])]; - tensor var_16943 = reshape(shape = var_16942, x = var_16937)[name = string("op_16943")]; - int32 var_16958 = const()[name = string("op_16958"), val = int32(-1)]; - fp16 const_954_promoted = const()[name = string("const_954_promoted"), val = fp16(-0x1p+0)]; - tensor var_16960 = mul(x = var_16899, y = const_954_promoted)[name = string("op_16960")]; - bool input_505_interleave_0 = const()[name = string("input_505_interleave_0"), val = bool(false)]; - tensor input_505 = concat(axis = var_16958, interleave = input_505_interleave_0, values = (var_16899, var_16960))[name = string("input_505")]; - tensor normed_605_axes_0 = const()[name = string("normed_605_axes_0"), val = tensor([-1])]; - fp16 var_16955_to_fp16 = const()[name = string("op_16955_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_605_cast_fp16 = layer_norm(axes = normed_605_axes_0, epsilon = var_16955_to_fp16, x = input_505)[name = string("normed_605_cast_fp16")]; - tensor normed_607_begin_0 = const()[name = string("normed_607_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_607_end_0 = const()[name = string("normed_607_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_607_end_mask_0 = const()[name = string("normed_607_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_607 = slice_by_index(begin = normed_607_begin_0, end = normed_607_end_0, end_mask = normed_607_end_mask_0, x = normed_605_cast_fp16)[name = string("normed_607")]; - tensor var_16974_to_fp16 = const()[name = string("op_16974_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528679168)))]; - tensor q_cast_fp16 = mul(x = normed_607, y = var_16974_to_fp16)[name = string("q_cast_fp16")]; - int32 var_16985 = const()[name = string("op_16985"), val = int32(-1)]; - fp16 const_958_promoted = const()[name = string("const_958_promoted"), val = fp16(-0x1p+0)]; - tensor var_16987 = mul(x = var_16921, y = const_958_promoted)[name = string("op_16987")]; - bool input_507_interleave_0 = const()[name = string("input_507_interleave_0"), val = bool(false)]; - tensor input_507 = concat(axis = var_16985, interleave = input_507_interleave_0, values = (var_16921, var_16987))[name = string("input_507")]; - tensor normed_609_axes_0 = const()[name = string("normed_609_axes_0"), val = tensor([-1])]; - fp16 var_16982_to_fp16 = const()[name = string("op_16982_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_609_cast_fp16 = layer_norm(axes = normed_609_axes_0, epsilon = var_16982_to_fp16, x = input_507)[name = string("normed_609_cast_fp16")]; - tensor normed_611_begin_0 = const()[name = string("normed_611_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_611_end_0 = const()[name = string("normed_611_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_611_end_mask_0 = const()[name = string("normed_611_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_611 = slice_by_index(begin = normed_611_begin_0, end = normed_611_end_0, end_mask = normed_611_end_mask_0, x = normed_609_cast_fp16)[name = string("normed_611")]; - tensor var_17001_to_fp16 = const()[name = string("op_17001_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528679744)))]; - tensor k_cast_fp16 = mul(x = normed_611, y = var_17001_to_fp16)[name = string("k_cast_fp16")]; - tensor var_17003_cast_fp16 = mul(x = q_cast_fp16, y = cos_1_cast_fp16)[name = string("op_17003_cast_fp16")]; - tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_101_cast_fp16 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = q_cast_fp16)[name = string("x1_101_cast_fp16")]; - tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_101_cast_fp16 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = q_cast_fp16)[name = string("x2_101_cast_fp16")]; - fp16 const_964_promoted_to_fp16 = const()[name = string("const_964_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17024_cast_fp16 = mul(x = x2_101_cast_fp16, y = const_964_promoted_to_fp16)[name = string("op_17024_cast_fp16")]; - int32 var_17026 = const()[name = string("op_17026"), val = int32(-1)]; - bool var_17027_interleave_0 = const()[name = string("op_17027_interleave_0"), val = bool(false)]; - tensor var_17027_cast_fp16 = concat(axis = var_17026, interleave = var_17027_interleave_0, values = (var_17024_cast_fp16, x1_101_cast_fp16))[name = string("op_17027_cast_fp16")]; - tensor var_17028_cast_fp16 = mul(x = var_17027_cast_fp16, y = sin_1_cast_fp16)[name = string("op_17028_cast_fp16")]; - tensor query_states_101_cast_fp16 = add(x = var_17003_cast_fp16, y = var_17028_cast_fp16)[name = string("query_states_101_cast_fp16")]; - tensor var_17031_cast_fp16 = mul(x = k_cast_fp16, y = cos_1_cast_fp16)[name = string("op_17031_cast_fp16")]; - tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_cast_fp16)[name = string("x1_cast_fp16")]; - tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_cast_fp16)[name = string("x2_cast_fp16")]; - fp16 const_967_promoted_to_fp16 = const()[name = string("const_967_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17052_cast_fp16 = mul(x = x2_cast_fp16, y = const_967_promoted_to_fp16)[name = string("op_17052_cast_fp16")]; - int32 var_17054 = const()[name = string("op_17054"), val = int32(-1)]; - bool var_17055_interleave_0 = const()[name = string("op_17055_interleave_0"), val = bool(false)]; - tensor var_17055_cast_fp16 = concat(axis = var_17054, interleave = var_17055_interleave_0, values = (var_17052_cast_fp16, x1_cast_fp16))[name = string("op_17055_cast_fp16")]; - tensor var_17056_cast_fp16 = mul(x = var_17055_cast_fp16, y = sin_1_cast_fp16)[name = string("op_17056_cast_fp16")]; - tensor key_states_101_cast_fp16 = add(x = var_17031_cast_fp16, y = var_17056_cast_fp16)[name = string("key_states_101_cast_fp16")]; - tensor expand_dims_300 = const()[name = string("expand_dims_300"), val = tensor([21])]; - tensor expand_dims_301 = const()[name = string("expand_dims_301"), val = tensor([0])]; - tensor expand_dims_303 = const()[name = string("expand_dims_303"), val = tensor([0])]; - tensor expand_dims_304 = const()[name = string("expand_dims_304"), val = tensor([22])]; - int32 concat_202_axis_0 = const()[name = string("concat_202_axis_0"), val = int32(0)]; - bool concat_202_interleave_0 = const()[name = string("concat_202_interleave_0"), val = bool(false)]; - tensor concat_202 = concat(axis = concat_202_axis_0, interleave = concat_202_interleave_0, values = (expand_dims_300, expand_dims_301, current_pos, expand_dims_303))[name = string("concat_202")]; - tensor concat_203_values1_0 = const()[name = string("concat_203_values1_0"), val = tensor([0])]; - tensor concat_203_values3_0 = const()[name = string("concat_203_values3_0"), val = tensor([0])]; - int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)]; - bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)]; - tensor concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (expand_dims_304, concat_203_values1_0, var_1909, concat_203_values3_0))[name = string("concat_203")]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16 = slice_update(begin = concat_202, begin_mask = model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0, end = concat_203, end_mask = model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_43_stride_0, update = key_states_101_cast_fp16, x = coreml_update_state_101)[name = string("model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_50_write_state")]; - tensor coreml_update_state_102 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_50")]; - tensor expand_dims_306 = const()[name = string("expand_dims_306"), val = tensor([43])]; - tensor expand_dims_307 = const()[name = string("expand_dims_307"), val = tensor([0])]; - tensor expand_dims_309 = const()[name = string("expand_dims_309"), val = tensor([0])]; - tensor expand_dims_310 = const()[name = string("expand_dims_310"), val = tensor([44])]; - int32 concat_206_axis_0 = const()[name = string("concat_206_axis_0"), val = int32(0)]; - bool concat_206_interleave_0 = const()[name = string("concat_206_interleave_0"), val = bool(false)]; - tensor concat_206 = concat(axis = concat_206_axis_0, interleave = concat_206_interleave_0, values = (expand_dims_306, expand_dims_307, current_pos, expand_dims_309))[name = string("concat_206")]; - tensor concat_207_values1_0 = const()[name = string("concat_207_values1_0"), val = tensor([0])]; - tensor concat_207_values3_0 = const()[name = string("concat_207_values3_0"), val = tensor([0])]; - int32 concat_207_axis_0 = const()[name = string("concat_207_axis_0"), val = int32(0)]; - bool concat_207_interleave_0 = const()[name = string("concat_207_interleave_0"), val = bool(false)]; - tensor concat_207 = concat(axis = concat_207_axis_0, interleave = concat_207_interleave_0, values = (expand_dims_310, concat_207_values1_0, var_1909, concat_207_values3_0))[name = string("concat_207")]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16 = slice_update(begin = concat_206, begin_mask = model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0, end = concat_207, end_mask = model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_44_stride_0, update = var_16943, x = coreml_update_state_102)[name = string("model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_51_write_state")]; - tensor coreml_update_state_103 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_51")]; - tensor var_17111_begin_0 = const()[name = string("op_17111_begin_0"), val = tensor([21, 0, 0, 0])]; - tensor var_17111_end_0 = const()[name = string("op_17111_end_0"), val = tensor([22, 1, 512, 256])]; - tensor var_17111_end_mask_0 = const()[name = string("op_17111_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_17111_cast_fp16 = slice_by_index(begin = var_17111_begin_0, end = var_17111_end_0, end_mask = var_17111_end_mask_0, x = coreml_update_state_103)[name = string("op_17111_cast_fp16")]; - tensor var_17118_begin_0 = const()[name = string("op_17118_begin_0"), val = tensor([43, 0, 0, 0])]; - tensor var_17118_end_0 = const()[name = string("op_17118_end_0"), val = tensor([1, 1, 512, 256])]; - tensor var_17118_end_mask_0 = const()[name = string("op_17118_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_17118_cast_fp16 = slice_by_index(begin = var_17118_begin_0, end = var_17118_end_0, end_mask = var_17118_end_mask_0, x = coreml_update_state_103)[name = string("op_17118_cast_fp16")]; - tensor var_17155 = const()[name = string("op_17155"), val = tensor([1, 4, 1, 1])]; - tensor x_405_cast_fp16 = tile(reps = var_17155, x = var_17111_cast_fp16)[name = string("x_405_cast_fp16")]; - tensor var_17175 = const()[name = string("op_17175"), val = tensor([1, 4, 1, 1])]; - tensor x_411_cast_fp16 = tile(reps = var_17175, x = var_17118_cast_fp16)[name = string("x_411_cast_fp16")]; - bool var_17202_transpose_x_1 = const()[name = string("op_17202_transpose_x_1"), val = bool(false)]; - bool var_17202_transpose_y_1 = const()[name = string("op_17202_transpose_y_1"), val = bool(true)]; - tensor var_17202 = matmul(transpose_x = var_17202_transpose_x_1, transpose_y = var_17202_transpose_y_1, x = query_states_101_cast_fp16, y = x_405_cast_fp16)[name = string("op_17202")]; - fp16 var_17203_to_fp16 = const()[name = string("op_17203_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_151_cast_fp16 = mul(x = var_17202, y = var_17203_to_fp16)[name = string("attn_weights_151_cast_fp16")]; - tensor attn_weights_153_cast_fp16 = add(x = attn_weights_151_cast_fp16, y = var_2083)[name = string("attn_weights_153_cast_fp16")]; - int32 var_17238 = const()[name = string("op_17238"), val = int32(-1)]; - tensor attn_weights_cast_fp16 = softmax(axis = var_17238, x = attn_weights_153_cast_fp16)[name = string("attn_weights_cast_fp16")]; - bool attn_output_251_transpose_x_0 = const()[name = string("attn_output_251_transpose_x_0"), val = bool(false)]; - bool attn_output_251_transpose_y_0 = const()[name = string("attn_output_251_transpose_y_0"), val = bool(false)]; - tensor attn_output_251_cast_fp16 = matmul(transpose_x = attn_output_251_transpose_x_0, transpose_y = attn_output_251_transpose_y_0, x = attn_weights_cast_fp16, y = x_411_cast_fp16)[name = string("attn_output_251_cast_fp16")]; - tensor var_17249_perm_0 = const()[name = string("op_17249_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_17253 = const()[name = string("op_17253"), val = tensor([1, 1, 1024])]; - tensor var_17249_cast_fp16 = transpose(perm = var_17249_perm_0, x = attn_output_251_cast_fp16)[name = string("transpose_4")]; - tensor attn_output_255_cast_fp16 = reshape(shape = var_17253, x = var_17249_cast_fp16)[name = string("attn_output_255_cast_fp16")]; - tensor var_17258 = const()[name = string("op_17258"), val = tensor([0, 2, 1])]; - string var_17274_pad_type_0 = const()[name = string("op_17274_pad_type_0"), val = string("valid")]; - int32 var_17274_groups_0 = const()[name = string("op_17274_groups_0"), val = int32(1)]; - tensor var_17274_strides_0 = const()[name = string("op_17274_strides_0"), val = tensor([1])]; - tensor var_17274_pad_0 = const()[name = string("op_17274_pad_0"), val = tensor([0, 0])]; - tensor var_17274_dilations_0 = const()[name = string("op_17274_dilations_0"), val = tensor([1])]; - tensor squeeze_25_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528680320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529565120))))[name = string("squeeze_25_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_17259_cast_fp16 = transpose(perm = var_17258, x = attn_output_255_cast_fp16)[name = string("transpose_3")]; - tensor var_17274_cast_fp16 = conv(dilations = var_17274_dilations_0, groups = var_17274_groups_0, pad = var_17274_pad_0, pad_type = var_17274_pad_type_0, strides = var_17274_strides_0, weight = squeeze_25_cast_fp16_to_fp32_to_fp16_palettized, x = var_17259_cast_fp16)[name = string("op_17274_cast_fp16")]; - tensor var_17278 = const()[name = string("op_17278"), val = tensor([0, 2, 1])]; - int32 var_17289 = const()[name = string("op_17289"), val = int32(-1)]; - fp16 const_976_promoted_to_fp16 = const()[name = string("const_976_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_409_cast_fp16 = transpose(perm = var_17278, x = var_17274_cast_fp16)[name = string("transpose_2")]; - tensor var_17291_cast_fp16 = mul(x = hidden_states_409_cast_fp16, y = const_976_promoted_to_fp16)[name = string("op_17291_cast_fp16")]; - bool input_511_interleave_0 = const()[name = string("input_511_interleave_0"), val = bool(false)]; - tensor input_511_cast_fp16 = concat(axis = var_17289, interleave = input_511_interleave_0, values = (hidden_states_409_cast_fp16, var_17291_cast_fp16))[name = string("input_511_cast_fp16")]; - tensor normed_613_axes_0 = const()[name = string("normed_613_axes_0"), val = tensor([-1])]; - fp16 var_17286_to_fp16 = const()[name = string("op_17286_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_613_cast_fp16 = layer_norm(axes = normed_613_axes_0, epsilon = var_17286_to_fp16, x = input_511_cast_fp16)[name = string("normed_613_cast_fp16")]; - tensor normed_615_begin_0 = const()[name = string("normed_615_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_615_end_0 = const()[name = string("normed_615_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_615_end_mask_0 = const()[name = string("normed_615_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_615_cast_fp16 = slice_by_index(begin = normed_615_begin_0, end = normed_615_end_0, end_mask = normed_615_end_mask_0, x = normed_613_cast_fp16)[name = string("normed_615_cast_fp16")]; - tensor var_17305_to_fp16 = const()[name = string("op_17305_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529583616)))]; - tensor attn_output_cast_fp16 = mul(x = normed_615_cast_fp16, y = var_17305_to_fp16)[name = string("attn_output_cast_fp16")]; - tensor hidden_states_411_cast_fp16 = add(x = hidden_states_401_cast_fp16, y = attn_output_cast_fp16)[name = string("hidden_states_411_cast_fp16")]; - int32 var_17318 = const()[name = string("op_17318"), val = int32(-1)]; - fp16 const_980_promoted_to_fp16 = const()[name = string("const_980_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17320_cast_fp16 = mul(x = hidden_states_411_cast_fp16, y = const_980_promoted_to_fp16)[name = string("op_17320_cast_fp16")]; - bool input_513_interleave_0 = const()[name = string("input_513_interleave_0"), val = bool(false)]; - tensor input_513_cast_fp16 = concat(axis = var_17318, interleave = input_513_interleave_0, values = (hidden_states_411_cast_fp16, var_17320_cast_fp16))[name = string("input_513_cast_fp16")]; - tensor normed_617_axes_0 = const()[name = string("normed_617_axes_0"), val = tensor([-1])]; - fp16 var_17315_to_fp16 = const()[name = string("op_17315_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_617_cast_fp16 = layer_norm(axes = normed_617_axes_0, epsilon = var_17315_to_fp16, x = input_513_cast_fp16)[name = string("normed_617_cast_fp16")]; - tensor normed_619_begin_0 = const()[name = string("normed_619_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_619_end_0 = const()[name = string("normed_619_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_619_end_mask_0 = const()[name = string("normed_619_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_619_cast_fp16 = slice_by_index(begin = normed_619_begin_0, end = normed_619_end_0, end_mask = normed_619_end_mask_0, x = normed_617_cast_fp16)[name = string("normed_619_cast_fp16")]; - tensor var_17334_to_fp16 = const()[name = string("op_17334_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529585984)))]; - tensor x_413_cast_fp16 = mul(x = normed_619_cast_fp16, y = var_17334_to_fp16)[name = string("x_413_cast_fp16")]; - tensor var_17346 = const()[name = string("op_17346"), val = tensor([0, 2, 1])]; - tensor input_515_axes_0 = const()[name = string("input_515_axes_0"), val = tensor([2])]; - tensor var_17347_cast_fp16 = transpose(perm = var_17346, x = x_413_cast_fp16)[name = string("transpose_1")]; - tensor input_515_cast_fp16 = expand_dims(axes = input_515_axes_0, x = var_17347_cast_fp16)[name = string("input_515_cast_fp16")]; - string x_pad_type_0 = const()[name = string("x_pad_type_0"), val = string("valid")]; - tensor x_strides_0 = const()[name = string("x_strides_0"), val = tensor([1, 1])]; - tensor x_pad_0 = const()[name = string("x_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_dilations_0 = const()[name = string("x_dilations_0"), val = tensor([1, 1])]; - int32 x_groups_0 = const()[name = string("x_groups_0"), val = int32(1)]; - tensor model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529588352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535560384))))[name = string("model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_cast_fp16 = conv(dilations = x_dilations_0, groups = x_groups_0, pad = x_pad_0, pad_type = x_pad_type_0, strides = x_strides_0, weight = model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_515_cast_fp16)[name = string("x_cast_fp16")]; - string b_pad_type_0 = const()[name = string("b_pad_type_0"), val = string("valid")]; - tensor b_strides_0 = const()[name = string("b_strides_0"), val = tensor([1, 1])]; - tensor b_pad_0 = const()[name = string("b_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_dilations_0 = const()[name = string("b_dilations_0"), val = tensor([1, 1])]; - int32 b_groups_0 = const()[name = string("b_groups_0"), val = int32(1)]; - tensor model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535671040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(541643072))))[name = string("model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_cast_fp16 = conv(dilations = b_dilations_0, groups = b_groups_0, pad = b_pad_0, pad_type = b_pad_type_0, strides = b_strides_0, weight = model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_515_cast_fp16)[name = string("b_cast_fp16")]; - string var_17372_mode_0 = const()[name = string("op_17372_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_17372_cast_fp16 = gelu(mode = var_17372_mode_0, x = x_cast_fp16)[name = string("op_17372_cast_fp16")]; - tensor input_517_cast_fp16 = mul(x = var_17372_cast_fp16, y = b_cast_fp16)[name = string("input_517_cast_fp16")]; - string e_pad_type_0 = const()[name = string("e_pad_type_0"), val = string("valid")]; - tensor e_strides_0 = const()[name = string("e_strides_0"), val = tensor([1, 1])]; - tensor e_pad_0 = const()[name = string("e_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_dilations_0 = const()[name = string("e_dilations_0"), val = tensor([1, 1])]; - int32 e_groups_0 = const()[name = string("e_groups_0"), val = int32(1)]; - tensor model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(541753728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547725760))))[name = string("model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_cast_fp16 = conv(dilations = e_dilations_0, groups = e_groups_0, pad = e_pad_0, pad_type = e_pad_type_0, strides = e_strides_0, weight = model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_517_cast_fp16)[name = string("e_cast_fp16")]; - tensor var_17380_axes_0 = const()[name = string("op_17380_axes_0"), val = tensor([2])]; - tensor var_17380_cast_fp16 = squeeze(axes = var_17380_axes_0, x = e_cast_fp16)[name = string("op_17380_cast_fp16")]; - tensor var_17381 = const()[name = string("op_17381"), val = tensor([0, 2, 1])]; - int32 var_17392 = const()[name = string("op_17392"), val = int32(-1)]; - fp16 const_984_promoted_to_fp16 = const()[name = string("const_984_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_413_cast_fp16 = transpose(perm = var_17381, x = var_17380_cast_fp16)[name = string("transpose_0")]; - tensor var_17394_cast_fp16 = mul(x = hidden_states_413_cast_fp16, y = const_984_promoted_to_fp16)[name = string("op_17394_cast_fp16")]; - bool input_519_interleave_0 = const()[name = string("input_519_interleave_0"), val = bool(false)]; - tensor input_519_cast_fp16 = concat(axis = var_17392, interleave = input_519_interleave_0, values = (hidden_states_413_cast_fp16, var_17394_cast_fp16))[name = string("input_519_cast_fp16")]; - tensor normed_621_axes_0 = const()[name = string("normed_621_axes_0"), val = tensor([-1])]; - fp16 var_17389_to_fp16 = const()[name = string("op_17389_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_621_cast_fp16 = layer_norm(axes = normed_621_axes_0, epsilon = var_17389_to_fp16, x = input_519_cast_fp16)[name = string("normed_621_cast_fp16")]; - tensor normed_623_begin_0 = const()[name = string("normed_623_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_623_end_0 = const()[name = string("normed_623_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_623_end_mask_0 = const()[name = string("normed_623_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_623_cast_fp16 = slice_by_index(begin = normed_623_begin_0, end = normed_623_end_0, end_mask = normed_623_end_mask_0, x = normed_621_cast_fp16)[name = string("normed_623_cast_fp16")]; - tensor var_17408_to_fp16 = const()[name = string("op_17408_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547744256)))]; - tensor hidden_states_415_cast_fp16 = mul(x = normed_623_cast_fp16, y = var_17408_to_fp16)[name = string("hidden_states_415_cast_fp16")]; - tensor hidden_states_cast_fp16 = add(x = hidden_states_411_cast_fp16, y = hidden_states_415_cast_fp16)[name = string("hidden_states_cast_fp16")]; - int32 var_17421 = const()[name = string("op_17421"), val = int32(-1)]; - fp16 const_988_promoted_to_fp16 = const()[name = string("const_988_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17423_cast_fp16 = mul(x = hidden_states_cast_fp16, y = const_988_promoted_to_fp16)[name = string("op_17423_cast_fp16")]; - bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)]; - tensor input_cast_fp16 = concat(axis = var_17421, interleave = input_interleave_0, values = (hidden_states_cast_fp16, var_17423_cast_fp16))[name = string("input_cast_fp16")]; - tensor normed_625_axes_0 = const()[name = string("normed_625_axes_0"), val = tensor([-1])]; - fp16 var_17418_to_fp16 = const()[name = string("op_17418_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_625_cast_fp16 = layer_norm(axes = normed_625_axes_0, epsilon = var_17418_to_fp16, x = input_cast_fp16)[name = string("normed_625_cast_fp16")]; - tensor normed_begin_0 = const()[name = string("normed_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_end_0 = const()[name = string("normed_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_end_mask_0 = const()[name = string("normed_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_cast_fp16 = slice_by_index(begin = normed_begin_0, end = normed_end_0, end_mask = normed_end_mask_0, x = normed_625_cast_fp16)[name = string("normed_cast_fp16")]; - tensor var_17437_to_fp16 = const()[name = string("op_17437_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547746624)))]; - tensor out_cast_fp16 = mul(x = normed_cast_fp16, y = var_17437_to_fp16)[name = string("out_cast_fp16")]; - tensor var_17441_begin_0 = const()[name = string("op_17441_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_17441_end_0 = const()[name = string("op_17441_end_0"), val = tensor([1, 1, 512, 256])]; - tensor var_17441_end_mask_0 = const()[name = string("op_17441_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_17441_squeeze_mask_0 = const()[name = string("op_17441_squeeze_mask_0"), val = tensor([true, false, false, false])]; - tensor var_17441_cast_fp16 = slice_by_index(begin = var_17441_begin_0, end = var_17441_end_0, end_mask = var_17441_end_mask_0, squeeze_mask = var_17441_squeeze_mask_0, x = coreml_update_state_103)[name = string("op_17441_cast_fp16")]; - tensor var_17444_begin_0 = const()[name = string("op_17444_begin_0"), val = tensor([0, 0, 0])]; - tensor var_17444_end_0 = const()[name = string("op_17444_end_0"), val = tensor([1, 512, 256])]; - tensor var_17444_end_mask_0 = const()[name = string("op_17444_end_mask_0"), val = tensor([false, true, true])]; - tensor var_17444_squeeze_mask_0 = const()[name = string("op_17444_squeeze_mask_0"), val = tensor([true, false, false])]; - tensor var_17444_cast_fp16 = slice_by_index(begin = var_17444_begin_0, end = var_17444_end_0, end_mask = var_17444_end_mask_0, squeeze_mask = var_17444_squeeze_mask_0, x = var_17441_cast_fp16)[name = string("op_17444_cast_fp16")]; - tensor var_17447_begin_0 = const()[name = string("op_17447_begin_0"), val = tensor([0, 0])]; - tensor var_17447_end_0 = const()[name = string("op_17447_end_0"), val = tensor([1, 256])]; - tensor var_17447_end_mask_0 = const()[name = string("op_17447_end_mask_0"), val = tensor([false, true])]; - tensor var_17447_squeeze_mask_0 = const()[name = string("op_17447_squeeze_mask_0"), val = tensor([true, false])]; - tensor var_17447_cast_fp16 = slice_by_index(begin = var_17447_begin_0, end = var_17447_end_0, end_mask = var_17447_end_mask_0, squeeze_mask = var_17447_squeeze_mask_0, x = var_17444_cast_fp16)[name = string("op_17447_cast_fp16")]; - tensor var_17450_begin_0 = const()[name = string("op_17450_begin_0"), val = tensor([0])]; - tensor var_17450_end_0 = const()[name = string("op_17450_end_0"), val = tensor([1])]; - tensor var_17450_end_mask_0 = const()[name = string("op_17450_end_mask_0"), val = tensor([false])]; - tensor var_17450_squeeze_mask_0 = const()[name = string("op_17450_squeeze_mask_0"), val = tensor([true])]; - fp16 var_17450_cast_fp16 = slice_by_index(begin = var_17450_begin_0, end = var_17450_end_0, end_mask = var_17450_end_mask_0, squeeze_mask = var_17450_squeeze_mask_0, x = var_17447_cast_fp16)[name = string("op_17450_cast_fp16")]; - fp16 var_17451_to_fp16 = const()[name = string("op_17451_to_fp16"), val = fp16(0x0p+0)]; - fp16 dummy_local_cast_fp16 = mul(x = var_17450_cast_fp16, y = var_17451_to_fp16)[name = string("dummy_local_cast_fp16")]; - tensor var_17455_begin_0 = const()[name = string("op_17455_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_17455_end_0 = const()[name = string("op_17455_end_0"), val = tensor([1, 1, 4096, 256])]; - tensor var_17455_end_mask_0 = const()[name = string("op_17455_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_17455_squeeze_mask_0 = const()[name = string("op_17455_squeeze_mask_0"), val = tensor([true, false, false, false])]; - tensor var_17455_cast_fp16 = slice_by_index(begin = var_17455_begin_0, end = var_17455_end_0, end_mask = var_17455_end_mask_0, squeeze_mask = var_17455_squeeze_mask_0, x = coreml_update_state_99)[name = string("op_17455_cast_fp16")]; - tensor var_17458_begin_0 = const()[name = string("op_17458_begin_0"), val = tensor([0, 0, 0])]; - tensor var_17458_end_0 = const()[name = string("op_17458_end_0"), val = tensor([1, 4096, 256])]; - tensor var_17458_end_mask_0 = const()[name = string("op_17458_end_mask_0"), val = tensor([false, true, true])]; - tensor var_17458_squeeze_mask_0 = const()[name = string("op_17458_squeeze_mask_0"), val = tensor([true, false, false])]; - tensor var_17458_cast_fp16 = slice_by_index(begin = var_17458_begin_0, end = var_17458_end_0, end_mask = var_17458_end_mask_0, squeeze_mask = var_17458_squeeze_mask_0, x = var_17455_cast_fp16)[name = string("op_17458_cast_fp16")]; - tensor var_17461_begin_0 = const()[name = string("op_17461_begin_0"), val = tensor([0, 0])]; - tensor var_17461_end_0 = const()[name = string("op_17461_end_0"), val = tensor([1, 256])]; - tensor var_17461_end_mask_0 = const()[name = string("op_17461_end_mask_0"), val = tensor([false, true])]; - tensor var_17461_squeeze_mask_0 = const()[name = string("op_17461_squeeze_mask_0"), val = tensor([true, false])]; - tensor var_17461_cast_fp16 = slice_by_index(begin = var_17461_begin_0, end = var_17461_end_0, end_mask = var_17461_end_mask_0, squeeze_mask = var_17461_squeeze_mask_0, x = var_17458_cast_fp16)[name = string("op_17461_cast_fp16")]; - tensor var_17464_begin_0 = const()[name = string("op_17464_begin_0"), val = tensor([0])]; - tensor var_17464_end_0 = const()[name = string("op_17464_end_0"), val = tensor([1])]; - tensor var_17464_end_mask_0 = const()[name = string("op_17464_end_mask_0"), val = tensor([false])]; - tensor var_17464_squeeze_mask_0 = const()[name = string("op_17464_squeeze_mask_0"), val = tensor([true])]; - fp16 var_17464_cast_fp16 = slice_by_index(begin = var_17464_begin_0, end = var_17464_end_0, end_mask = var_17464_end_mask_0, squeeze_mask = var_17464_squeeze_mask_0, x = var_17461_cast_fp16)[name = string("op_17464_cast_fp16")]; - fp16 var_17465_to_fp16 = const()[name = string("op_17465_to_fp16"), val = fp16(0x0p+0)]; - fp16 dummy_global_cast_fp16 = mul(x = var_17464_cast_fp16, y = var_17465_to_fp16)[name = string("dummy_global_cast_fp16")]; - fp16 var_17468_cast_fp16 = add(x = dummy_local_cast_fp16, y = dummy_global_cast_fp16)[name = string("op_17468_cast_fp16")]; - tensor var_17472 = const()[name = string("op_17472"), val = tensor([1, 1, 1])]; - tensor var_17473_cast_fp16 = reshape(shape = var_17472, x = var_17468_cast_fp16)[name = string("op_17473_cast_fp16")]; - tensor output_hidden_states = add(x = out_cast_fp16, y = var_17473_cast_fp16)[name = string("op_17475_cast_fp16")]; - tensor position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")]; - } -> (output_hidden_states); - func infer_rotate(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_global, state> model_model_kv_cache_local, tensor position_ids) { - tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547748992))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(901312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547765440))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1126720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1347968))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547769600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548654400))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2253376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548670848))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2478784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548675008))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548679168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549563968))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3605440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549580416))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3830848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549584576))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4056256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549588736))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549605184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549826432))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5182912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549830592))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549834752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550719552))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6309568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550736000))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6534976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550740160))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550744320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551629120))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7661632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551645568))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7887040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551649728))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551653888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(552538688))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9013696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(552555136))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9239104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(552559296))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9464512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(552563456))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10365760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(552579904))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10591168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(552584064))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(552588224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553473024))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11717824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553489472))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11943232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553493632))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12168640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553497792))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13069888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553514240))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13295296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553518400))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553522560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554407360))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14421952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554423808))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14647360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554427968))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14872768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554432128))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15774016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554448576))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15999424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554452736))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16224832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554456896))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17126080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17347328))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17351488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554473344))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554477504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555362304))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18478144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555378752))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18703552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18924800))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555382912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556267712))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19830208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556284160))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20055616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20276864))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556288320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557173120))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557189568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21403520))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21407680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21628928))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21633088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557410816))))[name = string("model_model_layers_16_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_16_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22534336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557427264))))[name = string("model_model_layers_16_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_16_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22759744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557431424))))[name = string("model_model_layers_16_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557435584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558320384))))[name = string("model_model_layers_17_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_17_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558336832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558558080))))[name = string("model_model_layers_17_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_17_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24111808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24333056))))[name = string("model_model_layers_17_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24337216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558562240))))[name = string("model_model_layers_18_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_18_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25238464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558578688))))[name = string("model_model_layers_18_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_18_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25463872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558582848))))[name = string("model_model_layers_18_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25689280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558587008))))[name = string("model_model_layers_19_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_19_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26590528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558603456))))[name = string("model_model_layers_19_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_19_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26815936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558607616))))[name = string("model_model_layers_19_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27041344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558611776))))[name = string("model_model_layers_20_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_20_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27942592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558628224))))[name = string("model_model_layers_20_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_20_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28168000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558632384))))[name = string("model_model_layers_20_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_21_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28393408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558636544))))[name = string("model_model_layers_21_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_21_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29294656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558652992))))[name = string("model_model_layers_21_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_21_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29520064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558657152))))[name = string("model_model_layers_21_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_22_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29745472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558661312))))[name = string("model_model_layers_22_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_22_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30646720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558677760))))[name = string("model_model_layers_22_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_22_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30872128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558681920))))[name = string("model_model_layers_22_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_23_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31097536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558686080))))[name = string("model_model_layers_23_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_23_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31998784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558702528))))[name = string("model_model_layers_23_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_23_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558706688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558927936))))[name = string("model_model_layers_23_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_24_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558932096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559816896))))[name = string("model_model_layers_24_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_24_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33350848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33572096))))[name = string("model_model_layers_24_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_24_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559833344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560054592))))[name = string("model_model_layers_24_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_25_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560058752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560943552))))[name = string("model_model_layers_25_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_25_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34702912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560960000))))[name = string("model_model_layers_25_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_25_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34928320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560964160))))[name = string("model_model_layers_25_self_attn_v_proj_weight_palettized")]; - int32 var_1616_batch_dims_0 = const()[name = string("op_1616_batch_dims_0"), val = int32(0)]; - bool var_1616_validate_indices_0 = const()[name = string("op_1616_validate_indices_0"), val = bool(false)]; - tensor var_1608_to_fp16 = const()[name = string("op_1608_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35153728)))]; - string current_pos_to_int16_dtype_0 = const()[name = string("current_pos_to_int16_dtype_0"), val = string("int16")]; - string cast_266_dtype_0 = const()[name = string("cast_266_dtype_0"), val = string("int32")]; - int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; - tensor current_pos_to_int16 = cast(dtype = current_pos_to_int16_dtype_0, x = current_pos)[name = string("cast_5")]; - tensor cast_266 = cast(dtype = cast_266_dtype_0, x = current_pos_to_int16)[name = string("cast_4")]; - tensor greater_equal_0 = greater_equal(x = cast_266, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; - int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(8192)]; - tensor add_0 = add(x = cast_266, y = slice_by_index_0)[name = string("add_0")]; - tensor select_0 = select(a = cast_266, b = add_0, cond = greater_equal_0)[name = string("select_0")]; - string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")]; - string cast_0_dtype_0 = const()[name = string("cast_0_dtype_0"), val = string("int32")]; - int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)]; - tensor select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_3")]; - tensor cast_0 = cast(dtype = cast_0_dtype_0, x = select_0_to_int16)[name = string("cast_2")]; - tensor greater_equal_0_1 = greater_equal(x = cast_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")]; - int32 slice_by_index_0_1 = const()[name = string("slice_by_index_0_1"), val = int32(8192)]; - tensor add_0_1 = add(x = cast_0, y = slice_by_index_0_1)[name = string("add_0_1")]; - tensor select_0_1 = select(a = cast_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")]; - int32 op_1616_cast_fp16_cast_uint16_cast_uint16_axis_0 = const()[name = string("op_1616_cast_fp16_cast_uint16_cast_uint16_axis_0"), val = int32(1)]; - tensor op_1616_cast_fp16_cast_uint16_cast_uint16 = gather(axis = op_1616_cast_fp16_cast_uint16_cast_uint16_axis_0, batch_dims = var_1616_batch_dims_0, indices = select_0_1, validate_indices = var_1616_validate_indices_0, x = var_1608_to_fp16)[name = string("op_1616_cast_fp16_cast_uint16_cast_uint16")]; - tensor var_1621 = const()[name = string("op_1621"), val = tensor([1, 1, 1, -1])]; - tensor sin_1_cast_fp16 = reshape(shape = var_1621, x = op_1616_cast_fp16_cast_uint16_cast_uint16)[name = string("sin_1_cast_fp16")]; - int32 var_1631_axis_0 = const()[name = string("op_1631_axis_0"), val = int32(1)]; - int32 var_1631_batch_dims_0 = const()[name = string("op_1631_batch_dims_0"), val = int32(0)]; - bool var_1631_validate_indices_0 = const()[name = string("op_1631_validate_indices_0"), val = bool(false)]; - tensor var_1623_to_fp16 = const()[name = string("op_1623_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39348096)))]; - string current_pos_to_uint16_dtype_0 = const()[name = string("current_pos_to_uint16_dtype_0"), val = string("uint16")]; - tensor current_pos_to_uint16 = cast(dtype = current_pos_to_uint16_dtype_0, x = current_pos)[name = string("cast_1")]; - tensor var_1631_cast_fp16_cast_uint16 = gather(axis = var_1631_axis_0, batch_dims = var_1631_batch_dims_0, indices = current_pos_to_uint16, validate_indices = var_1631_validate_indices_0, x = var_1623_to_fp16)[name = string("op_1631_cast_fp16_cast_uint16")]; - tensor var_1636 = const()[name = string("op_1636"), val = tensor([1, 1, 1, -1])]; - tensor cos_1_cast_fp16 = reshape(shape = var_1636, x = var_1631_cast_fp16_cast_uint16)[name = string("cos_1_cast_fp16")]; - int32 var_1657 = const()[name = string("op_1657"), val = int32(-1)]; - fp16 const_0_promoted_to_fp16 = const()[name = string("const_0_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_1659_cast_fp16 = mul(x = hidden_states, y = const_0_promoted_to_fp16)[name = string("op_1659_cast_fp16")]; - bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; - tensor input_1_cast_fp16 = concat(axis = var_1657, interleave = input_1_interleave_0, values = (hidden_states, var_1659_cast_fp16))[name = string("input_1_cast_fp16")]; - tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; - fp16 var_1654_to_fp16 = const()[name = string("op_1654_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_1654_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")]; - tensor normed_3_begin_0 = const()[name = string("normed_3_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_3_end_0 = const()[name = string("normed_3_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_3_end_mask_0 = const()[name = string("normed_3_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_3_cast_fp16 = slice_by_index(begin = normed_3_begin_0, end = normed_3_end_0, end_mask = normed_3_end_mask_0, x = normed_1_cast_fp16)[name = string("normed_3_cast_fp16")]; - tensor var_1673_to_fp16 = const()[name = string("op_1673_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43542464)))]; - tensor hidden_states_3_cast_fp16 = mul(x = normed_3_cast_fp16, y = var_1673_to_fp16)[name = string("hidden_states_3_cast_fp16")]; - tensor var_1678 = const()[name = string("op_1678"), val = tensor([0, 2, 1])]; - tensor var_1681_axes_0 = const()[name = string("op_1681_axes_0"), val = tensor([2])]; - tensor var_1679_cast_fp16 = transpose(perm = var_1678, x = hidden_states_3_cast_fp16)[name = string("transpose_155")]; - tensor var_1681_cast_fp16 = expand_dims(axes = var_1681_axes_0, x = var_1679_cast_fp16)[name = string("op_1681_cast_fp16")]; - string var_1697_pad_type_0 = const()[name = string("op_1697_pad_type_0"), val = string("valid")]; - tensor var_1697_strides_0 = const()[name = string("op_1697_strides_0"), val = tensor([1, 1])]; - tensor var_1697_pad_0 = const()[name = string("op_1697_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1697_dilations_0 = const()[name = string("op_1697_dilations_0"), val = tensor([1, 1])]; - int32 var_1697_groups_0 = const()[name = string("op_1697_groups_0"), val = int32(1)]; - tensor var_1697 = conv(dilations = var_1697_dilations_0, groups = var_1697_groups_0, pad = var_1697_pad_0, pad_type = var_1697_pad_type_0, strides = var_1697_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_1681_cast_fp16)[name = string("op_1697")]; - tensor var_1702 = const()[name = string("op_1702"), val = tensor([1, 4, 1, 256])]; - tensor var_1703 = reshape(shape = var_1702, x = var_1697)[name = string("op_1703")]; - string var_1719_pad_type_0 = const()[name = string("op_1719_pad_type_0"), val = string("valid")]; - tensor var_1719_strides_0 = const()[name = string("op_1719_strides_0"), val = tensor([1, 1])]; - tensor var_1719_pad_0 = const()[name = string("op_1719_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1719_dilations_0 = const()[name = string("op_1719_dilations_0"), val = tensor([1, 1])]; - int32 var_1719_groups_0 = const()[name = string("op_1719_groups_0"), val = int32(1)]; - tensor var_1719 = conv(dilations = var_1719_dilations_0, groups = var_1719_groups_0, pad = var_1719_pad_0, pad_type = var_1719_pad_type_0, strides = var_1719_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_1681_cast_fp16)[name = string("op_1719")]; - tensor var_1724 = const()[name = string("op_1724"), val = tensor([1, 1, 1, 256])]; - tensor var_1725 = reshape(shape = var_1724, x = var_1719)[name = string("op_1725")]; - string var_1741_pad_type_0 = const()[name = string("op_1741_pad_type_0"), val = string("valid")]; - tensor var_1741_strides_0 = const()[name = string("op_1741_strides_0"), val = tensor([1, 1])]; - tensor var_1741_pad_0 = const()[name = string("op_1741_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1741_dilations_0 = const()[name = string("op_1741_dilations_0"), val = tensor([1, 1])]; - int32 var_1741_groups_0 = const()[name = string("op_1741_groups_0"), val = int32(1)]; - tensor var_1741 = conv(dilations = var_1741_dilations_0, groups = var_1741_groups_0, pad = var_1741_pad_0, pad_type = var_1741_pad_type_0, strides = var_1741_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_1681_cast_fp16)[name = string("op_1741")]; - tensor var_1746 = const()[name = string("op_1746"), val = tensor([1, 1, 1, 256])]; - tensor var_1747 = reshape(shape = var_1746, x = var_1741)[name = string("op_1747")]; - int32 var_1762 = const()[name = string("op_1762"), val = int32(-1)]; - fp16 const_4_promoted = const()[name = string("const_4_promoted"), val = fp16(-0x1p+0)]; - tensor var_1764 = mul(x = var_1703, y = const_4_promoted)[name = string("op_1764")]; - bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; - tensor input_5 = concat(axis = var_1762, interleave = input_5_interleave_0, values = (var_1703, var_1764))[name = string("input_5")]; - tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; - fp16 var_1759_to_fp16 = const()[name = string("op_1759_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_1759_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; - tensor normed_7_begin_0 = const()[name = string("normed_7_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_7_end_0 = const()[name = string("normed_7_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_7_end_mask_0 = const()[name = string("normed_7_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_7 = slice_by_index(begin = normed_7_begin_0, end = normed_7_end_0, end_mask = normed_7_end_mask_0, x = normed_5_cast_fp16)[name = string("normed_7")]; - tensor var_1778_to_fp16 = const()[name = string("op_1778_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43544832)))]; - tensor q_1_cast_fp16 = mul(x = normed_7, y = var_1778_to_fp16)[name = string("q_1_cast_fp16")]; - int32 var_1789 = const()[name = string("op_1789"), val = int32(-1)]; - fp16 const_8_promoted = const()[name = string("const_8_promoted"), val = fp16(-0x1p+0)]; - tensor var_1791 = mul(x = var_1725, y = const_8_promoted)[name = string("op_1791")]; - bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)]; - tensor input_7 = concat(axis = var_1789, interleave = input_7_interleave_0, values = (var_1725, var_1791))[name = string("input_7")]; - tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; - fp16 var_1786_to_fp16 = const()[name = string("op_1786_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_1786_to_fp16, x = input_7)[name = string("normed_9_cast_fp16")]; - tensor normed_11_begin_0 = const()[name = string("normed_11_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_11_end_0 = const()[name = string("normed_11_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_11_end_mask_0 = const()[name = string("normed_11_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_11 = slice_by_index(begin = normed_11_begin_0, end = normed_11_end_0, end_mask = normed_11_end_mask_0, x = normed_9_cast_fp16)[name = string("normed_11")]; - tensor var_1805_to_fp16 = const()[name = string("op_1805_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43545408)))]; - tensor k_1_cast_fp16 = mul(x = normed_11, y = var_1805_to_fp16)[name = string("k_1_cast_fp16")]; - tensor var_1807_cast_fp16 = mul(x = q_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1807_cast_fp16")]; - tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1_cast_fp16)[name = string("x1_1_cast_fp16")]; - tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1_cast_fp16)[name = string("x2_1_cast_fp16")]; - fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_1828_cast_fp16 = mul(x = x2_1_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1828_cast_fp16")]; - int32 var_1830 = const()[name = string("op_1830"), val = int32(-1)]; - bool var_1831_interleave_0 = const()[name = string("op_1831_interleave_0"), val = bool(false)]; - tensor var_1831_cast_fp16 = concat(axis = var_1830, interleave = var_1831_interleave_0, values = (var_1828_cast_fp16, x1_1_cast_fp16))[name = string("op_1831_cast_fp16")]; - tensor var_1832_cast_fp16 = mul(x = var_1831_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1832_cast_fp16")]; - tensor query_states_1_cast_fp16 = add(x = var_1807_cast_fp16, y = var_1832_cast_fp16)[name = string("query_states_1_cast_fp16")]; - tensor var_1835_cast_fp16 = mul(x = k_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1835_cast_fp16")]; - tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1_cast_fp16)[name = string("x1_3_cast_fp16")]; - tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1_cast_fp16)[name = string("x2_3_cast_fp16")]; - fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_1856_cast_fp16 = mul(x = x2_3_cast_fp16, y = const_17_promoted_to_fp16)[name = string("op_1856_cast_fp16")]; - int32 var_1858 = const()[name = string("op_1858"), val = int32(-1)]; - bool var_1859_interleave_0 = const()[name = string("op_1859_interleave_0"), val = bool(false)]; - tensor var_1859_cast_fp16 = concat(axis = var_1858, interleave = var_1859_interleave_0, values = (var_1856_cast_fp16, x1_3_cast_fp16))[name = string("op_1859_cast_fp16")]; - tensor var_1860_cast_fp16 = mul(x = var_1859_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1860_cast_fp16")]; - tensor key_states_1_cast_fp16 = add(x = var_1835_cast_fp16, y = var_1860_cast_fp16)[name = string("key_states_1_cast_fp16")]; - tensor read_state_0 = read_state(input = model_model_kv_cache_local)[name = string("read_state_0")]; - tensor key_slice_1_begin_0 = const()[name = string("key_slice_1_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor key_slice_1_end_0 = const()[name = string("key_slice_1_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_slice_1_end_mask_0 = const()[name = string("key_slice_1_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_1_cast_fp16 = slice_by_index(begin = key_slice_1_begin_0, end = key_slice_1_end_0, end_mask = key_slice_1_end_mask_0, x = read_state_0)[name = string("key_slice_1_cast_fp16")]; - tensor key_tail_1_begin_0 = const()[name = string("key_tail_1_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_1_end_0 = const()[name = string("key_tail_1_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_1_cast_fp16 = slice_by_index(begin = key_tail_1_begin_0, end = key_tail_1_end_0, x = key_slice_1_cast_fp16)[name = string("key_tail_1_cast_fp16")]; - int32 var_1873 = const()[name = string("op_1873"), val = int32(2)]; - bool shifted_key_1_interleave_0 = const()[name = string("shifted_key_1_interleave_0"), val = bool(false)]; - tensor shifted_key_1_cast_fp16 = concat(axis = var_1873, interleave = shifted_key_1_interleave_0, values = (key_tail_1_cast_fp16, key_states_1_cast_fp16))[name = string("shifted_key_1_cast_fp16")]; - tensor concat_0 = const()[name = string("concat_0"), val = tensor([0, 0, 0, 0])]; - tensor concat_1 = const()[name = string("concat_1"), val = tensor([1, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_1_stride_0, update = shifted_key_1_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_52_write_state")]; - tensor coreml_update_state_52 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_52")]; - tensor value_slice_1_begin_0 = const()[name = string("value_slice_1_begin_0"), val = tensor([22, 0, 0, 0])]; - tensor value_slice_1_end_0 = const()[name = string("value_slice_1_end_0"), val = tensor([23, 1, 512, 256])]; - tensor value_slice_1_end_mask_0 = const()[name = string("value_slice_1_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_1_cast_fp16 = slice_by_index(begin = value_slice_1_begin_0, end = value_slice_1_end_0, end_mask = value_slice_1_end_mask_0, x = coreml_update_state_52)[name = string("value_slice_1_cast_fp16")]; - tensor value_tail_1_begin_0 = const()[name = string("value_tail_1_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_1_end_0 = const()[name = string("value_tail_1_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_1_cast_fp16 = slice_by_index(begin = value_tail_1_begin_0, end = value_tail_1_end_0, x = value_slice_1_cast_fp16)[name = string("value_tail_1_cast_fp16")]; - int32 var_1907 = const()[name = string("op_1907"), val = int32(2)]; - bool shifted_value_1_interleave_0 = const()[name = string("shifted_value_1_interleave_0"), val = bool(false)]; - tensor shifted_value_1_cast_fp16 = concat(axis = var_1907, interleave = shifted_value_1_interleave_0, values = (value_tail_1_cast_fp16, var_1747))[name = string("shifted_value_1_cast_fp16")]; - tensor concat_2 = const()[name = string("concat_2"), val = tensor([22, 0, 0, 0])]; - tensor concat_3 = const()[name = string("concat_3"), val = tensor([23, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_2_stride_0, update = shifted_value_1_cast_fp16, x = coreml_update_state_52)[name = string("model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_53_write_state")]; - tensor coreml_update_state_53 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_53")]; - tensor var_1935_begin_0 = const()[name = string("op_1935_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1935_end_0 = const()[name = string("op_1935_end_0"), val = tensor([1, 1, 512, 256])]; - tensor var_1935_end_mask_0 = const()[name = string("op_1935_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_1935_cast_fp16 = slice_by_index(begin = var_1935_begin_0, end = var_1935_end_0, end_mask = var_1935_end_mask_0, x = coreml_update_state_53)[name = string("op_1935_cast_fp16")]; - tensor var_1942_begin_0 = const()[name = string("op_1942_begin_0"), val = tensor([22, 0, 0, 0])]; - tensor var_1942_end_0 = const()[name = string("op_1942_end_0"), val = tensor([23, 1, 512, 256])]; - tensor var_1942_end_mask_0 = const()[name = string("op_1942_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_1942_cast_fp16 = slice_by_index(begin = var_1942_begin_0, end = var_1942_end_0, end_mask = var_1942_end_mask_0, x = coreml_update_state_53)[name = string("op_1942_cast_fp16")]; - tensor var_1979 = const()[name = string("op_1979"), val = tensor([1, 4, 1, 1])]; - tensor x_5_cast_fp16 = tile(reps = var_1979, x = var_1935_cast_fp16)[name = string("x_5_cast_fp16")]; - tensor var_1999 = const()[name = string("op_1999"), val = tensor([1, 4, 1, 1])]; - tensor x_11_cast_fp16 = tile(reps = var_1999, x = var_1942_cast_fp16)[name = string("x_11_cast_fp16")]; - bool var_2026_transpose_x_1 = const()[name = string("op_2026_transpose_x_1"), val = bool(false)]; - bool var_2026_transpose_y_1 = const()[name = string("op_2026_transpose_y_1"), val = bool(true)]; - tensor var_2026 = matmul(transpose_x = var_2026_transpose_x_1, transpose_y = var_2026_transpose_y_1, x = query_states_1_cast_fp16, y = x_5_cast_fp16)[name = string("op_2026")]; - fp16 var_2027_to_fp16 = const()[name = string("op_2027_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_1_cast_fp16 = mul(x = var_2026, y = var_2027_to_fp16)[name = string("attn_weights_1_cast_fp16")]; - tensor var_2059_begin_0 = const()[name = string("op_2059_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2059_end_0 = const()[name = string("op_2059_end_0"), val = tensor([1, 1, 1, 512])]; - tensor var_2059_end_mask_0 = const()[name = string("op_2059_end_mask_0"), val = tensor([true, true, true, false])]; - tensor var_2059 = slice_by_index(begin = var_2059_begin_0, end = var_2059_end_0, end_mask = var_2059_end_mask_0, x = causal_mask)[name = string("op_2059")]; - tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = var_2059)[name = string("attn_weights_3_cast_fp16")]; - int32 var_2062 = const()[name = string("op_2062"), val = int32(-1)]; - tensor attn_weights_5_cast_fp16 = softmax(axis = var_2062, x = attn_weights_3_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; - bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; - bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; - tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_5_cast_fp16, y = x_11_cast_fp16)[name = string("attn_output_1_cast_fp16")]; - tensor var_2073_perm_0 = const()[name = string("op_2073_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_2077 = const()[name = string("op_2077"), val = tensor([1, 1, 1024])]; - tensor var_2073_cast_fp16 = transpose(perm = var_2073_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_154")]; - tensor attn_output_5_cast_fp16 = reshape(shape = var_2077, x = var_2073_cast_fp16)[name = string("attn_output_5_cast_fp16")]; - tensor var_2082 = const()[name = string("op_2082"), val = tensor([0, 2, 1])]; - string var_2098_pad_type_0 = const()[name = string("op_2098_pad_type_0"), val = string("valid")]; - int32 var_2098_groups_0 = const()[name = string("op_2098_groups_0"), val = int32(1)]; - tensor var_2098_strides_0 = const()[name = string("op_2098_strides_0"), val = tensor([1])]; - tensor var_2098_pad_0 = const()[name = string("op_2098_pad_0"), val = tensor([0, 0])]; - tensor var_2098_dilations_0 = const()[name = string("op_2098_dilations_0"), val = tensor([1])]; - tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43545984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44430784))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_2083_cast_fp16 = transpose(perm = var_2082, x = attn_output_5_cast_fp16)[name = string("transpose_153")]; - tensor var_2098_cast_fp16 = conv(dilations = var_2098_dilations_0, groups = var_2098_groups_0, pad = var_2098_pad_0, pad_type = var_2098_pad_type_0, strides = var_2098_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_2083_cast_fp16)[name = string("op_2098_cast_fp16")]; - tensor var_2102 = const()[name = string("op_2102"), val = tensor([0, 2, 1])]; - int32 var_2113 = const()[name = string("op_2113"), val = int32(-1)]; - fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_9_cast_fp16 = transpose(perm = var_2102, x = var_2098_cast_fp16)[name = string("transpose_152")]; - tensor var_2115_cast_fp16 = mul(x = hidden_states_9_cast_fp16, y = const_26_promoted_to_fp16)[name = string("op_2115_cast_fp16")]; - bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; - tensor input_11_cast_fp16 = concat(axis = var_2113, interleave = input_11_interleave_0, values = (hidden_states_9_cast_fp16, var_2115_cast_fp16))[name = string("input_11_cast_fp16")]; - tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; - fp16 var_2110_to_fp16 = const()[name = string("op_2110_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_2110_to_fp16, x = input_11_cast_fp16)[name = string("normed_13_cast_fp16")]; - tensor normed_15_begin_0 = const()[name = string("normed_15_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_15_end_0 = const()[name = string("normed_15_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_15_end_mask_0 = const()[name = string("normed_15_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_15_cast_fp16 = slice_by_index(begin = normed_15_begin_0, end = normed_15_end_0, end_mask = normed_15_end_mask_0, x = normed_13_cast_fp16)[name = string("normed_15_cast_fp16")]; - tensor var_2129_to_fp16 = const()[name = string("op_2129_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44449280)))]; - tensor attn_output_9_cast_fp16 = mul(x = normed_15_cast_fp16, y = var_2129_to_fp16)[name = string("attn_output_9_cast_fp16")]; - tensor hidden_states_11_cast_fp16 = add(x = hidden_states, y = attn_output_9_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; - int32 var_2142 = const()[name = string("op_2142"), val = int32(-1)]; - fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2144_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_2144_cast_fp16")]; - bool input_13_interleave_0 = const()[name = string("input_13_interleave_0"), val = bool(false)]; - tensor input_13_cast_fp16 = concat(axis = var_2142, interleave = input_13_interleave_0, values = (hidden_states_11_cast_fp16, var_2144_cast_fp16))[name = string("input_13_cast_fp16")]; - tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; - fp16 var_2139_to_fp16 = const()[name = string("op_2139_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_2139_to_fp16, x = input_13_cast_fp16)[name = string("normed_17_cast_fp16")]; - tensor normed_19_begin_0 = const()[name = string("normed_19_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_19_end_0 = const()[name = string("normed_19_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_19_end_mask_0 = const()[name = string("normed_19_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_19_cast_fp16 = slice_by_index(begin = normed_19_begin_0, end = normed_19_end_0, end_mask = normed_19_end_mask_0, x = normed_17_cast_fp16)[name = string("normed_19_cast_fp16")]; - tensor var_2158_to_fp16 = const()[name = string("op_2158_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44451648)))]; - tensor x_13_cast_fp16 = mul(x = normed_19_cast_fp16, y = var_2158_to_fp16)[name = string("x_13_cast_fp16")]; - tensor var_2170 = const()[name = string("op_2170"), val = tensor([0, 2, 1])]; - tensor input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor([2])]; - tensor var_2171_cast_fp16 = transpose(perm = var_2170, x = x_13_cast_fp16)[name = string("transpose_151")]; - tensor input_15_cast_fp16 = expand_dims(axes = input_15_axes_0, x = var_2171_cast_fp16)[name = string("input_15_cast_fp16")]; - string x_15_pad_type_0 = const()[name = string("x_15_pad_type_0"), val = string("valid")]; - tensor x_15_strides_0 = const()[name = string("x_15_strides_0"), val = tensor([1, 1])]; - tensor x_15_pad_0 = const()[name = string("x_15_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_15_dilations_0 = const()[name = string("x_15_dilations_0"), val = tensor([1, 1])]; - int32 x_15_groups_0 = const()[name = string("x_15_groups_0"), val = int32(1)]; - tensor model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560968320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566940352))))[name = string("model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_15_cast_fp16 = conv(dilations = x_15_dilations_0, groups = x_15_groups_0, pad = x_15_pad_0, pad_type = x_15_pad_type_0, strides = x_15_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_15_cast_fp16)[name = string("x_15_cast_fp16")]; - string b_1_pad_type_0 = const()[name = string("b_1_pad_type_0"), val = string("valid")]; - tensor b_1_strides_0 = const()[name = string("b_1_strides_0"), val = tensor([1, 1])]; - tensor b_1_pad_0 = const()[name = string("b_1_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_1_dilations_0 = const()[name = string("b_1_dilations_0"), val = tensor([1, 1])]; - int32 b_1_groups_0 = const()[name = string("b_1_groups_0"), val = int32(1)]; - tensor model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567051008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(573023040))))[name = string("model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_1_cast_fp16 = conv(dilations = b_1_dilations_0, groups = b_1_groups_0, pad = b_1_pad_0, pad_type = b_1_pad_type_0, strides = b_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_15_cast_fp16)[name = string("b_1_cast_fp16")]; - string var_2196_mode_0 = const()[name = string("op_2196_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_2196_cast_fp16 = gelu(mode = var_2196_mode_0, x = x_15_cast_fp16)[name = string("op_2196_cast_fp16")]; - tensor input_17_cast_fp16 = mul(x = var_2196_cast_fp16, y = b_1_cast_fp16)[name = string("input_17_cast_fp16")]; - string e_1_pad_type_0 = const()[name = string("e_1_pad_type_0"), val = string("valid")]; - tensor e_1_strides_0 = const()[name = string("e_1_strides_0"), val = tensor([1, 1])]; - tensor e_1_pad_0 = const()[name = string("e_1_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_1_dilations_0 = const()[name = string("e_1_dilations_0"), val = tensor([1, 1])]; - int32 e_1_groups_0 = const()[name = string("e_1_groups_0"), val = int32(1)]; - tensor model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56619392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62591424))))[name = string("model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_1_cast_fp16 = conv(dilations = e_1_dilations_0, groups = e_1_groups_0, pad = e_1_pad_0, pad_type = e_1_pad_type_0, strides = e_1_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_17_cast_fp16)[name = string("e_1_cast_fp16")]; - tensor var_2204_axes_0 = const()[name = string("op_2204_axes_0"), val = tensor([2])]; - tensor var_2204_cast_fp16 = squeeze(axes = var_2204_axes_0, x = e_1_cast_fp16)[name = string("op_2204_cast_fp16")]; - tensor var_2205 = const()[name = string("op_2205"), val = tensor([0, 2, 1])]; - int32 var_2216 = const()[name = string("op_2216"), val = int32(-1)]; - fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_13_cast_fp16 = transpose(perm = var_2205, x = var_2204_cast_fp16)[name = string("transpose_150")]; - tensor var_2218_cast_fp16 = mul(x = hidden_states_13_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_2218_cast_fp16")]; - bool input_19_interleave_0 = const()[name = string("input_19_interleave_0"), val = bool(false)]; - tensor input_19_cast_fp16 = concat(axis = var_2216, interleave = input_19_interleave_0, values = (hidden_states_13_cast_fp16, var_2218_cast_fp16))[name = string("input_19_cast_fp16")]; - tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; - fp16 var_2213_to_fp16 = const()[name = string("op_2213_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_2213_to_fp16, x = input_19_cast_fp16)[name = string("normed_21_cast_fp16")]; - tensor normed_23_begin_0 = const()[name = string("normed_23_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_23_end_0 = const()[name = string("normed_23_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_23_end_mask_0 = const()[name = string("normed_23_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_23_cast_fp16 = slice_by_index(begin = normed_23_begin_0, end = normed_23_end_0, end_mask = normed_23_end_mask_0, x = normed_21_cast_fp16)[name = string("normed_23_cast_fp16")]; - tensor var_2232_to_fp16 = const()[name = string("op_2232_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62609920)))]; - tensor hidden_states_15_cast_fp16 = mul(x = normed_23_cast_fp16, y = var_2232_to_fp16)[name = string("hidden_states_15_cast_fp16")]; - tensor hidden_states_17_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; - int32 var_2283 = const()[name = string("op_2283"), val = int32(-1)]; - fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2285_cast_fp16 = mul(x = hidden_states_17_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_2285_cast_fp16")]; - bool input_21_interleave_0 = const()[name = string("input_21_interleave_0"), val = bool(false)]; - tensor input_21_cast_fp16 = concat(axis = var_2283, interleave = input_21_interleave_0, values = (hidden_states_17_cast_fp16, var_2285_cast_fp16))[name = string("input_21_cast_fp16")]; - tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; - fp16 var_2280_to_fp16 = const()[name = string("op_2280_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_2280_to_fp16, x = input_21_cast_fp16)[name = string("normed_25_cast_fp16")]; - tensor normed_27_begin_0 = const()[name = string("normed_27_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_27_end_0 = const()[name = string("normed_27_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_27_end_mask_0 = const()[name = string("normed_27_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_27_cast_fp16 = slice_by_index(begin = normed_27_begin_0, end = normed_27_end_0, end_mask = normed_27_end_mask_0, x = normed_25_cast_fp16)[name = string("normed_27_cast_fp16")]; - tensor var_2299_to_fp16 = const()[name = string("op_2299_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62612288)))]; - tensor hidden_states_19_cast_fp16 = mul(x = normed_27_cast_fp16, y = var_2299_to_fp16)[name = string("hidden_states_19_cast_fp16")]; - tensor var_2304 = const()[name = string("op_2304"), val = tensor([0, 2, 1])]; - tensor var_2307_axes_0 = const()[name = string("op_2307_axes_0"), val = tensor([2])]; - tensor var_2305_cast_fp16 = transpose(perm = var_2304, x = hidden_states_19_cast_fp16)[name = string("transpose_149")]; - tensor var_2307_cast_fp16 = expand_dims(axes = var_2307_axes_0, x = var_2305_cast_fp16)[name = string("op_2307_cast_fp16")]; - string var_2323_pad_type_0 = const()[name = string("op_2323_pad_type_0"), val = string("valid")]; - tensor var_2323_strides_0 = const()[name = string("op_2323_strides_0"), val = tensor([1, 1])]; - tensor var_2323_pad_0 = const()[name = string("op_2323_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2323_dilations_0 = const()[name = string("op_2323_dilations_0"), val = tensor([1, 1])]; - int32 var_2323_groups_0 = const()[name = string("op_2323_groups_0"), val = int32(1)]; - tensor var_2323 = conv(dilations = var_2323_dilations_0, groups = var_2323_groups_0, pad = var_2323_pad_0, pad_type = var_2323_pad_type_0, strides = var_2323_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_2307_cast_fp16)[name = string("op_2323")]; - tensor var_2328 = const()[name = string("op_2328"), val = tensor([1, 4, 1, 256])]; - tensor var_2329 = reshape(shape = var_2328, x = var_2323)[name = string("op_2329")]; - string var_2345_pad_type_0 = const()[name = string("op_2345_pad_type_0"), val = string("valid")]; - tensor var_2345_strides_0 = const()[name = string("op_2345_strides_0"), val = tensor([1, 1])]; - tensor var_2345_pad_0 = const()[name = string("op_2345_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2345_dilations_0 = const()[name = string("op_2345_dilations_0"), val = tensor([1, 1])]; - int32 var_2345_groups_0 = const()[name = string("op_2345_groups_0"), val = int32(1)]; - tensor var_2345 = conv(dilations = var_2345_dilations_0, groups = var_2345_groups_0, pad = var_2345_pad_0, pad_type = var_2345_pad_type_0, strides = var_2345_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_2307_cast_fp16)[name = string("op_2345")]; - tensor var_2350 = const()[name = string("op_2350"), val = tensor([1, 1, 1, 256])]; - tensor var_2351 = reshape(shape = var_2350, x = var_2345)[name = string("op_2351")]; - string var_2367_pad_type_0 = const()[name = string("op_2367_pad_type_0"), val = string("valid")]; - tensor var_2367_strides_0 = const()[name = string("op_2367_strides_0"), val = tensor([1, 1])]; - tensor var_2367_pad_0 = const()[name = string("op_2367_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2367_dilations_0 = const()[name = string("op_2367_dilations_0"), val = tensor([1, 1])]; - int32 var_2367_groups_0 = const()[name = string("op_2367_groups_0"), val = int32(1)]; - tensor var_2367 = conv(dilations = var_2367_dilations_0, groups = var_2367_groups_0, pad = var_2367_pad_0, pad_type = var_2367_pad_type_0, strides = var_2367_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_2307_cast_fp16)[name = string("op_2367")]; - tensor var_2372 = const()[name = string("op_2372"), val = tensor([1, 1, 1, 256])]; - tensor var_2373 = reshape(shape = var_2372, x = var_2367)[name = string("op_2373")]; - int32 var_2388 = const()[name = string("op_2388"), val = int32(-1)]; - fp16 const_42_promoted = const()[name = string("const_42_promoted"), val = fp16(-0x1p+0)]; - tensor var_2390 = mul(x = var_2329, y = const_42_promoted)[name = string("op_2390")]; - bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; - tensor input_25 = concat(axis = var_2388, interleave = input_25_interleave_0, values = (var_2329, var_2390))[name = string("input_25")]; - tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; - fp16 var_2385_to_fp16 = const()[name = string("op_2385_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_2385_to_fp16, x = input_25)[name = string("normed_29_cast_fp16")]; - tensor normed_31_begin_0 = const()[name = string("normed_31_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_31_end_0 = const()[name = string("normed_31_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_31_end_mask_0 = const()[name = string("normed_31_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_31 = slice_by_index(begin = normed_31_begin_0, end = normed_31_end_0, end_mask = normed_31_end_mask_0, x = normed_29_cast_fp16)[name = string("normed_31")]; - tensor var_2404_to_fp16 = const()[name = string("op_2404_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62614656)))]; - tensor q_3_cast_fp16 = mul(x = normed_31, y = var_2404_to_fp16)[name = string("q_3_cast_fp16")]; - int32 var_2415 = const()[name = string("op_2415"), val = int32(-1)]; - fp16 const_46_promoted = const()[name = string("const_46_promoted"), val = fp16(-0x1p+0)]; - tensor var_2417 = mul(x = var_2351, y = const_46_promoted)[name = string("op_2417")]; - bool input_27_interleave_0 = const()[name = string("input_27_interleave_0"), val = bool(false)]; - tensor input_27 = concat(axis = var_2415, interleave = input_27_interleave_0, values = (var_2351, var_2417))[name = string("input_27")]; - tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; - fp16 var_2412_to_fp16 = const()[name = string("op_2412_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_2412_to_fp16, x = input_27)[name = string("normed_33_cast_fp16")]; - tensor normed_35_begin_0 = const()[name = string("normed_35_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_35_end_0 = const()[name = string("normed_35_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_35_end_mask_0 = const()[name = string("normed_35_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_35 = slice_by_index(begin = normed_35_begin_0, end = normed_35_end_0, end_mask = normed_35_end_mask_0, x = normed_33_cast_fp16)[name = string("normed_35")]; - tensor var_2431_to_fp16 = const()[name = string("op_2431_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62615232)))]; - tensor k_3_cast_fp16 = mul(x = normed_35, y = var_2431_to_fp16)[name = string("k_3_cast_fp16")]; - tensor var_2433_cast_fp16 = mul(x = q_3_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2433_cast_fp16")]; - tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3_cast_fp16)[name = string("x1_5_cast_fp16")]; - tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3_cast_fp16)[name = string("x2_5_cast_fp16")]; - fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2454_cast_fp16 = mul(x = x2_5_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_2454_cast_fp16")]; - int32 var_2456 = const()[name = string("op_2456"), val = int32(-1)]; - bool var_2457_interleave_0 = const()[name = string("op_2457_interleave_0"), val = bool(false)]; - tensor var_2457_cast_fp16 = concat(axis = var_2456, interleave = var_2457_interleave_0, values = (var_2454_cast_fp16, x1_5_cast_fp16))[name = string("op_2457_cast_fp16")]; - tensor var_2458_cast_fp16 = mul(x = var_2457_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2458_cast_fp16")]; - tensor query_states_5_cast_fp16 = add(x = var_2433_cast_fp16, y = var_2458_cast_fp16)[name = string("query_states_5_cast_fp16")]; - tensor var_2461_cast_fp16 = mul(x = k_3_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2461_cast_fp16")]; - tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3_cast_fp16)[name = string("x1_7_cast_fp16")]; - tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3_cast_fp16)[name = string("x2_7_cast_fp16")]; - fp16 const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2482_cast_fp16 = mul(x = x2_7_cast_fp16, y = const_55_promoted_to_fp16)[name = string("op_2482_cast_fp16")]; - int32 var_2484 = const()[name = string("op_2484"), val = int32(-1)]; - bool var_2485_interleave_0 = const()[name = string("op_2485_interleave_0"), val = bool(false)]; - tensor var_2485_cast_fp16 = concat(axis = var_2484, interleave = var_2485_interleave_0, values = (var_2482_cast_fp16, x1_7_cast_fp16))[name = string("op_2485_cast_fp16")]; - tensor var_2486_cast_fp16 = mul(x = var_2485_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2486_cast_fp16")]; - tensor key_states_5_cast_fp16 = add(x = var_2461_cast_fp16, y = var_2486_cast_fp16)[name = string("key_states_5_cast_fp16")]; - tensor key_slice_3_begin_0 = const()[name = string("key_slice_3_begin_0"), val = tensor([1, 0, 0, 0])]; - tensor key_slice_3_end_0 = const()[name = string("key_slice_3_end_0"), val = tensor([2, 1, 512, 256])]; - tensor key_slice_3_end_mask_0 = const()[name = string("key_slice_3_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_3_cast_fp16 = slice_by_index(begin = key_slice_3_begin_0, end = key_slice_3_end_0, end_mask = key_slice_3_end_mask_0, x = coreml_update_state_53)[name = string("key_slice_3_cast_fp16")]; - tensor key_tail_3_begin_0 = const()[name = string("key_tail_3_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_3_end_0 = const()[name = string("key_tail_3_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_3_cast_fp16 = slice_by_index(begin = key_tail_3_begin_0, end = key_tail_3_end_0, x = key_slice_3_cast_fp16)[name = string("key_tail_3_cast_fp16")]; - int32 var_2499 = const()[name = string("op_2499"), val = int32(2)]; - bool shifted_key_3_interleave_0 = const()[name = string("shifted_key_3_interleave_0"), val = bool(false)]; - tensor shifted_key_3_cast_fp16 = concat(axis = var_2499, interleave = shifted_key_3_interleave_0, values = (key_tail_3_cast_fp16, key_states_5_cast_fp16))[name = string("shifted_key_3_cast_fp16")]; - tensor concat_4 = const()[name = string("concat_4"), val = tensor([1, 0, 0, 0])]; - tensor concat_5 = const()[name = string("concat_5"), val = tensor([2, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_4, begin_mask = model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0, end = concat_5, end_mask = model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_3_stride_0, update = shifted_key_3_cast_fp16, x = coreml_update_state_53)[name = string("model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_54_write_state")]; - tensor coreml_update_state_54 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_54")]; - tensor value_slice_3_begin_0 = const()[name = string("value_slice_3_begin_0"), val = tensor([23, 0, 0, 0])]; - tensor value_slice_3_end_0 = const()[name = string("value_slice_3_end_0"), val = tensor([24, 1, 512, 256])]; - tensor value_slice_3_end_mask_0 = const()[name = string("value_slice_3_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_3_cast_fp16 = slice_by_index(begin = value_slice_3_begin_0, end = value_slice_3_end_0, end_mask = value_slice_3_end_mask_0, x = coreml_update_state_54)[name = string("value_slice_3_cast_fp16")]; - tensor value_tail_3_begin_0 = const()[name = string("value_tail_3_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_3_end_0 = const()[name = string("value_tail_3_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_3_cast_fp16 = slice_by_index(begin = value_tail_3_begin_0, end = value_tail_3_end_0, x = value_slice_3_cast_fp16)[name = string("value_tail_3_cast_fp16")]; - int32 var_2533 = const()[name = string("op_2533"), val = int32(2)]; - bool shifted_value_3_interleave_0 = const()[name = string("shifted_value_3_interleave_0"), val = bool(false)]; - tensor shifted_value_3_cast_fp16 = concat(axis = var_2533, interleave = shifted_value_3_interleave_0, values = (value_tail_3_cast_fp16, var_2373))[name = string("shifted_value_3_cast_fp16")]; - tensor concat_6 = const()[name = string("concat_6"), val = tensor([23, 0, 0, 0])]; - tensor concat_7 = const()[name = string("concat_7"), val = tensor([24, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_4_stride_0, update = shifted_value_3_cast_fp16, x = coreml_update_state_54)[name = string("model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_55_write_state")]; - tensor coreml_update_state_55 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_55")]; - tensor var_2561_begin_0 = const()[name = string("op_2561_begin_0"), val = tensor([1, 0, 0, 0])]; - tensor var_2561_end_0 = const()[name = string("op_2561_end_0"), val = tensor([2, 1, 512, 256])]; - tensor var_2561_end_mask_0 = const()[name = string("op_2561_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_2561_cast_fp16 = slice_by_index(begin = var_2561_begin_0, end = var_2561_end_0, end_mask = var_2561_end_mask_0, x = coreml_update_state_55)[name = string("op_2561_cast_fp16")]; - tensor var_2568_begin_0 = const()[name = string("op_2568_begin_0"), val = tensor([23, 0, 0, 0])]; - tensor var_2568_end_0 = const()[name = string("op_2568_end_0"), val = tensor([24, 1, 512, 256])]; - tensor var_2568_end_mask_0 = const()[name = string("op_2568_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_2568_cast_fp16 = slice_by_index(begin = var_2568_begin_0, end = var_2568_end_0, end_mask = var_2568_end_mask_0, x = coreml_update_state_55)[name = string("op_2568_cast_fp16")]; - tensor var_2605 = const()[name = string("op_2605"), val = tensor([1, 4, 1, 1])]; - tensor x_21_cast_fp16 = tile(reps = var_2605, x = var_2561_cast_fp16)[name = string("x_21_cast_fp16")]; - tensor var_2625 = const()[name = string("op_2625"), val = tensor([1, 4, 1, 1])]; - tensor x_27_cast_fp16 = tile(reps = var_2625, x = var_2568_cast_fp16)[name = string("x_27_cast_fp16")]; - bool var_2652_transpose_x_1 = const()[name = string("op_2652_transpose_x_1"), val = bool(false)]; - bool var_2652_transpose_y_1 = const()[name = string("op_2652_transpose_y_1"), val = bool(true)]; - tensor var_2652 = matmul(transpose_x = var_2652_transpose_x_1, transpose_y = var_2652_transpose_y_1, x = query_states_5_cast_fp16, y = x_21_cast_fp16)[name = string("op_2652")]; - fp16 var_2653_to_fp16 = const()[name = string("op_2653_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_7_cast_fp16 = mul(x = var_2652, y = var_2653_to_fp16)[name = string("attn_weights_7_cast_fp16")]; - tensor attn_weights_9_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = var_2059)[name = string("attn_weights_9_cast_fp16")]; - int32 var_2688 = const()[name = string("op_2688"), val = int32(-1)]; - tensor attn_weights_11_cast_fp16 = softmax(axis = var_2688, x = attn_weights_9_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; - bool attn_output_11_transpose_x_0 = const()[name = string("attn_output_11_transpose_x_0"), val = bool(false)]; - bool attn_output_11_transpose_y_0 = const()[name = string("attn_output_11_transpose_y_0"), val = bool(false)]; - tensor attn_output_11_cast_fp16 = matmul(transpose_x = attn_output_11_transpose_x_0, transpose_y = attn_output_11_transpose_y_0, x = attn_weights_11_cast_fp16, y = x_27_cast_fp16)[name = string("attn_output_11_cast_fp16")]; - tensor var_2699_perm_0 = const()[name = string("op_2699_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_2703 = const()[name = string("op_2703"), val = tensor([1, 1, 1024])]; - tensor var_2699_cast_fp16 = transpose(perm = var_2699_perm_0, x = attn_output_11_cast_fp16)[name = string("transpose_148")]; - tensor attn_output_15_cast_fp16 = reshape(shape = var_2703, x = var_2699_cast_fp16)[name = string("attn_output_15_cast_fp16")]; - tensor var_2708 = const()[name = string("op_2708"), val = tensor([0, 2, 1])]; - string var_2724_pad_type_0 = const()[name = string("op_2724_pad_type_0"), val = string("valid")]; - int32 var_2724_groups_0 = const()[name = string("op_2724_groups_0"), val = int32(1)]; - tensor var_2724_strides_0 = const()[name = string("op_2724_strides_0"), val = tensor([1])]; - tensor var_2724_pad_0 = const()[name = string("op_2724_pad_0"), val = tensor([0, 0])]; - tensor var_2724_dilations_0 = const()[name = string("op_2724_dilations_0"), val = tensor([1])]; - tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62615808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63500608))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_2709_cast_fp16 = transpose(perm = var_2708, x = attn_output_15_cast_fp16)[name = string("transpose_147")]; - tensor var_2724_cast_fp16 = conv(dilations = var_2724_dilations_0, groups = var_2724_groups_0, pad = var_2724_pad_0, pad_type = var_2724_pad_type_0, strides = var_2724_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_2709_cast_fp16)[name = string("op_2724_cast_fp16")]; - tensor var_2728 = const()[name = string("op_2728"), val = tensor([0, 2, 1])]; - int32 var_2739 = const()[name = string("op_2739"), val = int32(-1)]; - fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_25_cast_fp16 = transpose(perm = var_2728, x = var_2724_cast_fp16)[name = string("transpose_146")]; - tensor var_2741_cast_fp16 = mul(x = hidden_states_25_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_2741_cast_fp16")]; - bool input_31_interleave_0 = const()[name = string("input_31_interleave_0"), val = bool(false)]; - tensor input_31_cast_fp16 = concat(axis = var_2739, interleave = input_31_interleave_0, values = (hidden_states_25_cast_fp16, var_2741_cast_fp16))[name = string("input_31_cast_fp16")]; - tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; - fp16 var_2736_to_fp16 = const()[name = string("op_2736_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_2736_to_fp16, x = input_31_cast_fp16)[name = string("normed_37_cast_fp16")]; - tensor normed_39_begin_0 = const()[name = string("normed_39_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_39_end_0 = const()[name = string("normed_39_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_39_end_mask_0 = const()[name = string("normed_39_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_39_cast_fp16 = slice_by_index(begin = normed_39_begin_0, end = normed_39_end_0, end_mask = normed_39_end_mask_0, x = normed_37_cast_fp16)[name = string("normed_39_cast_fp16")]; - tensor var_2755_to_fp16 = const()[name = string("op_2755_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63519104)))]; - tensor attn_output_19_cast_fp16 = mul(x = normed_39_cast_fp16, y = var_2755_to_fp16)[name = string("attn_output_19_cast_fp16")]; - tensor hidden_states_27_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = attn_output_19_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; - int32 var_2768 = const()[name = string("op_2768"), val = int32(-1)]; - fp16 const_68_promoted_to_fp16 = const()[name = string("const_68_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2770_cast_fp16 = mul(x = hidden_states_27_cast_fp16, y = const_68_promoted_to_fp16)[name = string("op_2770_cast_fp16")]; - bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; - tensor input_33_cast_fp16 = concat(axis = var_2768, interleave = input_33_interleave_0, values = (hidden_states_27_cast_fp16, var_2770_cast_fp16))[name = string("input_33_cast_fp16")]; - tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; - fp16 var_2765_to_fp16 = const()[name = string("op_2765_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_2765_to_fp16, x = input_33_cast_fp16)[name = string("normed_41_cast_fp16")]; - tensor normed_43_begin_0 = const()[name = string("normed_43_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_43_end_0 = const()[name = string("normed_43_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_43_end_mask_0 = const()[name = string("normed_43_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_43_cast_fp16 = slice_by_index(begin = normed_43_begin_0, end = normed_43_end_0, end_mask = normed_43_end_mask_0, x = normed_41_cast_fp16)[name = string("normed_43_cast_fp16")]; - tensor var_2784_to_fp16 = const()[name = string("op_2784_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63521472)))]; - tensor x_29_cast_fp16 = mul(x = normed_43_cast_fp16, y = var_2784_to_fp16)[name = string("x_29_cast_fp16")]; - tensor var_2796 = const()[name = string("op_2796"), val = tensor([0, 2, 1])]; - tensor input_35_axes_0 = const()[name = string("input_35_axes_0"), val = tensor([2])]; - tensor var_2797_cast_fp16 = transpose(perm = var_2796, x = x_29_cast_fp16)[name = string("transpose_145")]; - tensor input_35_cast_fp16 = expand_dims(axes = input_35_axes_0, x = var_2797_cast_fp16)[name = string("input_35_cast_fp16")]; - string x_31_pad_type_0 = const()[name = string("x_31_pad_type_0"), val = string("valid")]; - tensor x_31_strides_0 = const()[name = string("x_31_strides_0"), val = tensor([1, 1])]; - tensor x_31_pad_0 = const()[name = string("x_31_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_31_dilations_0 = const()[name = string("x_31_dilations_0"), val = tensor([1, 1])]; - int32 x_31_groups_0 = const()[name = string("x_31_groups_0"), val = int32(1)]; - tensor model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(573133696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(579105728))))[name = string("model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_31_cast_fp16 = conv(dilations = x_31_dilations_0, groups = x_31_groups_0, pad = x_31_pad_0, pad_type = x_31_pad_type_0, strides = x_31_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_35_cast_fp16)[name = string("x_31_cast_fp16")]; - string b_3_pad_type_0 = const()[name = string("b_3_pad_type_0"), val = string("valid")]; - tensor b_3_strides_0 = const()[name = string("b_3_strides_0"), val = tensor([1, 1])]; - tensor b_3_pad_0 = const()[name = string("b_3_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_3_dilations_0 = const()[name = string("b_3_dilations_0"), val = tensor([1, 1])]; - int32 b_3_groups_0 = const()[name = string("b_3_groups_0"), val = int32(1)]; - tensor model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(579216384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585188416))))[name = string("model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_3_cast_fp16 = conv(dilations = b_3_dilations_0, groups = b_3_groups_0, pad = b_3_pad_0, pad_type = b_3_pad_type_0, strides = b_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_35_cast_fp16)[name = string("b_3_cast_fp16")]; - string var_2822_mode_0 = const()[name = string("op_2822_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_2822_cast_fp16 = gelu(mode = var_2822_mode_0, x = x_31_cast_fp16)[name = string("op_2822_cast_fp16")]; - tensor input_37_cast_fp16 = mul(x = var_2822_cast_fp16, y = b_3_cast_fp16)[name = string("input_37_cast_fp16")]; - string e_3_pad_type_0 = const()[name = string("e_3_pad_type_0"), val = string("valid")]; - tensor e_3_strides_0 = const()[name = string("e_3_strides_0"), val = tensor([1, 1])]; - tensor e_3_pad_0 = const()[name = string("e_3_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_3_dilations_0 = const()[name = string("e_3_dilations_0"), val = tensor([1, 1])]; - int32 e_3_groups_0 = const()[name = string("e_3_groups_0"), val = int32(1)]; - tensor model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75689216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81661248))))[name = string("model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_3_cast_fp16 = conv(dilations = e_3_dilations_0, groups = e_3_groups_0, pad = e_3_pad_0, pad_type = e_3_pad_type_0, strides = e_3_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_37_cast_fp16)[name = string("e_3_cast_fp16")]; - tensor var_2830_axes_0 = const()[name = string("op_2830_axes_0"), val = tensor([2])]; - tensor var_2830_cast_fp16 = squeeze(axes = var_2830_axes_0, x = e_3_cast_fp16)[name = string("op_2830_cast_fp16")]; - tensor var_2831 = const()[name = string("op_2831"), val = tensor([0, 2, 1])]; - int32 var_2842 = const()[name = string("op_2842"), val = int32(-1)]; - fp16 const_72_promoted_to_fp16 = const()[name = string("const_72_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_29_cast_fp16 = transpose(perm = var_2831, x = var_2830_cast_fp16)[name = string("transpose_144")]; - tensor var_2844_cast_fp16 = mul(x = hidden_states_29_cast_fp16, y = const_72_promoted_to_fp16)[name = string("op_2844_cast_fp16")]; - bool input_39_interleave_0 = const()[name = string("input_39_interleave_0"), val = bool(false)]; - tensor input_39_cast_fp16 = concat(axis = var_2842, interleave = input_39_interleave_0, values = (hidden_states_29_cast_fp16, var_2844_cast_fp16))[name = string("input_39_cast_fp16")]; - tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; - fp16 var_2839_to_fp16 = const()[name = string("op_2839_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_2839_to_fp16, x = input_39_cast_fp16)[name = string("normed_45_cast_fp16")]; - tensor normed_47_begin_0 = const()[name = string("normed_47_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_47_end_0 = const()[name = string("normed_47_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_47_end_mask_0 = const()[name = string("normed_47_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_47_cast_fp16 = slice_by_index(begin = normed_47_begin_0, end = normed_47_end_0, end_mask = normed_47_end_mask_0, x = normed_45_cast_fp16)[name = string("normed_47_cast_fp16")]; - tensor var_2858_to_fp16 = const()[name = string("op_2858_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81679744)))]; - tensor hidden_states_31_cast_fp16 = mul(x = normed_47_cast_fp16, y = var_2858_to_fp16)[name = string("hidden_states_31_cast_fp16")]; - tensor hidden_states_33_cast_fp16 = add(x = hidden_states_27_cast_fp16, y = hidden_states_31_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; - int32 var_2909 = const()[name = string("op_2909"), val = int32(-1)]; - fp16 const_76_promoted_to_fp16 = const()[name = string("const_76_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2911_cast_fp16 = mul(x = hidden_states_33_cast_fp16, y = const_76_promoted_to_fp16)[name = string("op_2911_cast_fp16")]; - bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; - tensor input_41_cast_fp16 = concat(axis = var_2909, interleave = input_41_interleave_0, values = (hidden_states_33_cast_fp16, var_2911_cast_fp16))[name = string("input_41_cast_fp16")]; - tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; - fp16 var_2906_to_fp16 = const()[name = string("op_2906_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_2906_to_fp16, x = input_41_cast_fp16)[name = string("normed_49_cast_fp16")]; - tensor normed_51_begin_0 = const()[name = string("normed_51_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_51_end_0 = const()[name = string("normed_51_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_51_end_mask_0 = const()[name = string("normed_51_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_51_cast_fp16 = slice_by_index(begin = normed_51_begin_0, end = normed_51_end_0, end_mask = normed_51_end_mask_0, x = normed_49_cast_fp16)[name = string("normed_51_cast_fp16")]; - tensor var_2925_to_fp16 = const()[name = string("op_2925_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81682112)))]; - tensor hidden_states_35_cast_fp16 = mul(x = normed_51_cast_fp16, y = var_2925_to_fp16)[name = string("hidden_states_35_cast_fp16")]; - tensor var_2930 = const()[name = string("op_2930"), val = tensor([0, 2, 1])]; - tensor var_2933_axes_0 = const()[name = string("op_2933_axes_0"), val = tensor([2])]; - tensor var_2931_cast_fp16 = transpose(perm = var_2930, x = hidden_states_35_cast_fp16)[name = string("transpose_143")]; - tensor var_2933_cast_fp16 = expand_dims(axes = var_2933_axes_0, x = var_2931_cast_fp16)[name = string("op_2933_cast_fp16")]; - string var_2949_pad_type_0 = const()[name = string("op_2949_pad_type_0"), val = string("valid")]; - tensor var_2949_strides_0 = const()[name = string("op_2949_strides_0"), val = tensor([1, 1])]; - tensor var_2949_pad_0 = const()[name = string("op_2949_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2949_dilations_0 = const()[name = string("op_2949_dilations_0"), val = tensor([1, 1])]; - int32 var_2949_groups_0 = const()[name = string("op_2949_groups_0"), val = int32(1)]; - tensor var_2949 = conv(dilations = var_2949_dilations_0, groups = var_2949_groups_0, pad = var_2949_pad_0, pad_type = var_2949_pad_type_0, strides = var_2949_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_2933_cast_fp16)[name = string("op_2949")]; - tensor var_2954 = const()[name = string("op_2954"), val = tensor([1, 4, 1, 256])]; - tensor var_2955 = reshape(shape = var_2954, x = var_2949)[name = string("op_2955")]; - string var_2971_pad_type_0 = const()[name = string("op_2971_pad_type_0"), val = string("valid")]; - tensor var_2971_strides_0 = const()[name = string("op_2971_strides_0"), val = tensor([1, 1])]; - tensor var_2971_pad_0 = const()[name = string("op_2971_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2971_dilations_0 = const()[name = string("op_2971_dilations_0"), val = tensor([1, 1])]; - int32 var_2971_groups_0 = const()[name = string("op_2971_groups_0"), val = int32(1)]; - tensor var_2971 = conv(dilations = var_2971_dilations_0, groups = var_2971_groups_0, pad = var_2971_pad_0, pad_type = var_2971_pad_type_0, strides = var_2971_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_2933_cast_fp16)[name = string("op_2971")]; - tensor var_2976 = const()[name = string("op_2976"), val = tensor([1, 1, 1, 256])]; - tensor var_2977 = reshape(shape = var_2976, x = var_2971)[name = string("op_2977")]; - string var_2993_pad_type_0 = const()[name = string("op_2993_pad_type_0"), val = string("valid")]; - tensor var_2993_strides_0 = const()[name = string("op_2993_strides_0"), val = tensor([1, 1])]; - tensor var_2993_pad_0 = const()[name = string("op_2993_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2993_dilations_0 = const()[name = string("op_2993_dilations_0"), val = tensor([1, 1])]; - int32 var_2993_groups_0 = const()[name = string("op_2993_groups_0"), val = int32(1)]; - tensor var_2993 = conv(dilations = var_2993_dilations_0, groups = var_2993_groups_0, pad = var_2993_pad_0, pad_type = var_2993_pad_type_0, strides = var_2993_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_2933_cast_fp16)[name = string("op_2993")]; - tensor var_2998 = const()[name = string("op_2998"), val = tensor([1, 1, 1, 256])]; - tensor var_2999 = reshape(shape = var_2998, x = var_2993)[name = string("op_2999")]; - int32 var_3014 = const()[name = string("op_3014"), val = int32(-1)]; - fp16 const_80_promoted = const()[name = string("const_80_promoted"), val = fp16(-0x1p+0)]; - tensor var_3016 = mul(x = var_2955, y = const_80_promoted)[name = string("op_3016")]; - bool input_45_interleave_0 = const()[name = string("input_45_interleave_0"), val = bool(false)]; - tensor input_45 = concat(axis = var_3014, interleave = input_45_interleave_0, values = (var_2955, var_3016))[name = string("input_45")]; - tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; - fp16 var_3011_to_fp16 = const()[name = string("op_3011_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_3011_to_fp16, x = input_45)[name = string("normed_53_cast_fp16")]; - tensor normed_55_begin_0 = const()[name = string("normed_55_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_55_end_0 = const()[name = string("normed_55_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_55_end_mask_0 = const()[name = string("normed_55_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_55 = slice_by_index(begin = normed_55_begin_0, end = normed_55_end_0, end_mask = normed_55_end_mask_0, x = normed_53_cast_fp16)[name = string("normed_55")]; - tensor var_3030_to_fp16 = const()[name = string("op_3030_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81684480)))]; - tensor q_5_cast_fp16 = mul(x = normed_55, y = var_3030_to_fp16)[name = string("q_5_cast_fp16")]; - int32 var_3041 = const()[name = string("op_3041"), val = int32(-1)]; - fp16 const_84_promoted = const()[name = string("const_84_promoted"), val = fp16(-0x1p+0)]; - tensor var_3043 = mul(x = var_2977, y = const_84_promoted)[name = string("op_3043")]; - bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; - tensor input_47 = concat(axis = var_3041, interleave = input_47_interleave_0, values = (var_2977, var_3043))[name = string("input_47")]; - tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; - fp16 var_3038_to_fp16 = const()[name = string("op_3038_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_3038_to_fp16, x = input_47)[name = string("normed_57_cast_fp16")]; - tensor normed_59_begin_0 = const()[name = string("normed_59_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_59_end_0 = const()[name = string("normed_59_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_59_end_mask_0 = const()[name = string("normed_59_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_59 = slice_by_index(begin = normed_59_begin_0, end = normed_59_end_0, end_mask = normed_59_end_mask_0, x = normed_57_cast_fp16)[name = string("normed_59")]; - tensor var_3057_to_fp16 = const()[name = string("op_3057_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81685056)))]; - tensor k_5_cast_fp16 = mul(x = normed_59, y = var_3057_to_fp16)[name = string("k_5_cast_fp16")]; - tensor var_3059_cast_fp16 = mul(x = q_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3059_cast_fp16")]; - tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5_cast_fp16)[name = string("x1_9_cast_fp16")]; - tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5_cast_fp16)[name = string("x2_9_cast_fp16")]; - fp16 const_90_promoted_to_fp16 = const()[name = string("const_90_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3080_cast_fp16 = mul(x = x2_9_cast_fp16, y = const_90_promoted_to_fp16)[name = string("op_3080_cast_fp16")]; - int32 var_3082 = const()[name = string("op_3082"), val = int32(-1)]; - bool var_3083_interleave_0 = const()[name = string("op_3083_interleave_0"), val = bool(false)]; - tensor var_3083_cast_fp16 = concat(axis = var_3082, interleave = var_3083_interleave_0, values = (var_3080_cast_fp16, x1_9_cast_fp16))[name = string("op_3083_cast_fp16")]; - tensor var_3084_cast_fp16 = mul(x = var_3083_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3084_cast_fp16")]; - tensor query_states_9_cast_fp16 = add(x = var_3059_cast_fp16, y = var_3084_cast_fp16)[name = string("query_states_9_cast_fp16")]; - tensor var_3087_cast_fp16 = mul(x = k_5_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3087_cast_fp16")]; - tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5_cast_fp16)[name = string("x1_11_cast_fp16")]; - tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5_cast_fp16)[name = string("x2_11_cast_fp16")]; - fp16 const_93_promoted_to_fp16 = const()[name = string("const_93_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3108_cast_fp16 = mul(x = x2_11_cast_fp16, y = const_93_promoted_to_fp16)[name = string("op_3108_cast_fp16")]; - int32 var_3110 = const()[name = string("op_3110"), val = int32(-1)]; - bool var_3111_interleave_0 = const()[name = string("op_3111_interleave_0"), val = bool(false)]; - tensor var_3111_cast_fp16 = concat(axis = var_3110, interleave = var_3111_interleave_0, values = (var_3108_cast_fp16, x1_11_cast_fp16))[name = string("op_3111_cast_fp16")]; - tensor var_3112_cast_fp16 = mul(x = var_3111_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3112_cast_fp16")]; - tensor key_states_9_cast_fp16 = add(x = var_3087_cast_fp16, y = var_3112_cast_fp16)[name = string("key_states_9_cast_fp16")]; - tensor key_slice_5_begin_0 = const()[name = string("key_slice_5_begin_0"), val = tensor([2, 0, 0, 0])]; - tensor key_slice_5_end_0 = const()[name = string("key_slice_5_end_0"), val = tensor([3, 1, 512, 256])]; - tensor key_slice_5_end_mask_0 = const()[name = string("key_slice_5_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_5_cast_fp16 = slice_by_index(begin = key_slice_5_begin_0, end = key_slice_5_end_0, end_mask = key_slice_5_end_mask_0, x = coreml_update_state_55)[name = string("key_slice_5_cast_fp16")]; - tensor key_tail_5_begin_0 = const()[name = string("key_tail_5_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_5_end_0 = const()[name = string("key_tail_5_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_5_cast_fp16 = slice_by_index(begin = key_tail_5_begin_0, end = key_tail_5_end_0, x = key_slice_5_cast_fp16)[name = string("key_tail_5_cast_fp16")]; - int32 var_3125 = const()[name = string("op_3125"), val = int32(2)]; - bool shifted_key_5_interleave_0 = const()[name = string("shifted_key_5_interleave_0"), val = bool(false)]; - tensor shifted_key_5_cast_fp16 = concat(axis = var_3125, interleave = shifted_key_5_interleave_0, values = (key_tail_5_cast_fp16, key_states_9_cast_fp16))[name = string("shifted_key_5_cast_fp16")]; - tensor concat_8 = const()[name = string("concat_8"), val = tensor([2, 0, 0, 0])]; - tensor concat_9 = const()[name = string("concat_9"), val = tensor([3, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_8, begin_mask = model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0, end = concat_9, end_mask = model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_5_stride_0, update = shifted_key_5_cast_fp16, x = coreml_update_state_55)[name = string("model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_56_write_state")]; - tensor coreml_update_state_56 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_56")]; - tensor value_slice_5_begin_0 = const()[name = string("value_slice_5_begin_0"), val = tensor([24, 0, 0, 0])]; - tensor value_slice_5_end_0 = const()[name = string("value_slice_5_end_0"), val = tensor([25, 1, 512, 256])]; - tensor value_slice_5_end_mask_0 = const()[name = string("value_slice_5_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_5_cast_fp16 = slice_by_index(begin = value_slice_5_begin_0, end = value_slice_5_end_0, end_mask = value_slice_5_end_mask_0, x = coreml_update_state_56)[name = string("value_slice_5_cast_fp16")]; - tensor value_tail_5_begin_0 = const()[name = string("value_tail_5_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_5_end_0 = const()[name = string("value_tail_5_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_5_cast_fp16 = slice_by_index(begin = value_tail_5_begin_0, end = value_tail_5_end_0, x = value_slice_5_cast_fp16)[name = string("value_tail_5_cast_fp16")]; - int32 var_3159 = const()[name = string("op_3159"), val = int32(2)]; - bool shifted_value_5_interleave_0 = const()[name = string("shifted_value_5_interleave_0"), val = bool(false)]; - tensor shifted_value_5_cast_fp16 = concat(axis = var_3159, interleave = shifted_value_5_interleave_0, values = (value_tail_5_cast_fp16, var_2999))[name = string("shifted_value_5_cast_fp16")]; - tensor concat_10 = const()[name = string("concat_10"), val = tensor([24, 0, 0, 0])]; - tensor concat_11 = const()[name = string("concat_11"), val = tensor([25, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_6_stride_0, update = shifted_value_5_cast_fp16, x = coreml_update_state_56)[name = string("model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_57_write_state")]; - tensor coreml_update_state_57 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_57")]; - tensor var_3187_begin_0 = const()[name = string("op_3187_begin_0"), val = tensor([2, 0, 0, 0])]; - tensor var_3187_end_0 = const()[name = string("op_3187_end_0"), val = tensor([3, 1, 512, 256])]; - tensor var_3187_end_mask_0 = const()[name = string("op_3187_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_3187_cast_fp16 = slice_by_index(begin = var_3187_begin_0, end = var_3187_end_0, end_mask = var_3187_end_mask_0, x = coreml_update_state_57)[name = string("op_3187_cast_fp16")]; - tensor var_3194_begin_0 = const()[name = string("op_3194_begin_0"), val = tensor([24, 0, 0, 0])]; - tensor var_3194_end_0 = const()[name = string("op_3194_end_0"), val = tensor([25, 1, 512, 256])]; - tensor var_3194_end_mask_0 = const()[name = string("op_3194_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_3194_cast_fp16 = slice_by_index(begin = var_3194_begin_0, end = var_3194_end_0, end_mask = var_3194_end_mask_0, x = coreml_update_state_57)[name = string("op_3194_cast_fp16")]; - tensor var_3231 = const()[name = string("op_3231"), val = tensor([1, 4, 1, 1])]; - tensor x_37_cast_fp16 = tile(reps = var_3231, x = var_3187_cast_fp16)[name = string("x_37_cast_fp16")]; - tensor var_3251 = const()[name = string("op_3251"), val = tensor([1, 4, 1, 1])]; - tensor x_43_cast_fp16 = tile(reps = var_3251, x = var_3194_cast_fp16)[name = string("x_43_cast_fp16")]; - bool var_3278_transpose_x_1 = const()[name = string("op_3278_transpose_x_1"), val = bool(false)]; - bool var_3278_transpose_y_1 = const()[name = string("op_3278_transpose_y_1"), val = bool(true)]; - tensor var_3278 = matmul(transpose_x = var_3278_transpose_x_1, transpose_y = var_3278_transpose_y_1, x = query_states_9_cast_fp16, y = x_37_cast_fp16)[name = string("op_3278")]; - fp16 var_3279_to_fp16 = const()[name = string("op_3279_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_13_cast_fp16 = mul(x = var_3278, y = var_3279_to_fp16)[name = string("attn_weights_13_cast_fp16")]; - tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = var_2059)[name = string("attn_weights_15_cast_fp16")]; - int32 var_3314 = const()[name = string("op_3314"), val = int32(-1)]; - tensor attn_weights_17_cast_fp16 = softmax(axis = var_3314, x = attn_weights_15_cast_fp16)[name = string("attn_weights_17_cast_fp16")]; - bool attn_output_21_transpose_x_0 = const()[name = string("attn_output_21_transpose_x_0"), val = bool(false)]; - bool attn_output_21_transpose_y_0 = const()[name = string("attn_output_21_transpose_y_0"), val = bool(false)]; - tensor attn_output_21_cast_fp16 = matmul(transpose_x = attn_output_21_transpose_x_0, transpose_y = attn_output_21_transpose_y_0, x = attn_weights_17_cast_fp16, y = x_43_cast_fp16)[name = string("attn_output_21_cast_fp16")]; - tensor var_3325_perm_0 = const()[name = string("op_3325_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_3329 = const()[name = string("op_3329"), val = tensor([1, 1, 1024])]; - tensor var_3325_cast_fp16 = transpose(perm = var_3325_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_142")]; - tensor attn_output_25_cast_fp16 = reshape(shape = var_3329, x = var_3325_cast_fp16)[name = string("attn_output_25_cast_fp16")]; - tensor var_3334 = const()[name = string("op_3334"), val = tensor([0, 2, 1])]; - string var_3350_pad_type_0 = const()[name = string("op_3350_pad_type_0"), val = string("valid")]; - int32 var_3350_groups_0 = const()[name = string("op_3350_groups_0"), val = int32(1)]; - tensor var_3350_strides_0 = const()[name = string("op_3350_strides_0"), val = tensor([1])]; - tensor var_3350_pad_0 = const()[name = string("op_3350_pad_0"), val = tensor([0, 0])]; - tensor var_3350_dilations_0 = const()[name = string("op_3350_dilations_0"), val = tensor([1])]; - tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81685632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82570432))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_3335_cast_fp16 = transpose(perm = var_3334, x = attn_output_25_cast_fp16)[name = string("transpose_141")]; - tensor var_3350_cast_fp16 = conv(dilations = var_3350_dilations_0, groups = var_3350_groups_0, pad = var_3350_pad_0, pad_type = var_3350_pad_type_0, strides = var_3350_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_3335_cast_fp16)[name = string("op_3350_cast_fp16")]; - tensor var_3354 = const()[name = string("op_3354"), val = tensor([0, 2, 1])]; - int32 var_3365 = const()[name = string("op_3365"), val = int32(-1)]; - fp16 const_102_promoted_to_fp16 = const()[name = string("const_102_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_41_cast_fp16 = transpose(perm = var_3354, x = var_3350_cast_fp16)[name = string("transpose_140")]; - tensor var_3367_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_102_promoted_to_fp16)[name = string("op_3367_cast_fp16")]; - bool input_51_interleave_0 = const()[name = string("input_51_interleave_0"), val = bool(false)]; - tensor input_51_cast_fp16 = concat(axis = var_3365, interleave = input_51_interleave_0, values = (hidden_states_41_cast_fp16, var_3367_cast_fp16))[name = string("input_51_cast_fp16")]; - tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; - fp16 var_3362_to_fp16 = const()[name = string("op_3362_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_3362_to_fp16, x = input_51_cast_fp16)[name = string("normed_61_cast_fp16")]; - tensor normed_63_begin_0 = const()[name = string("normed_63_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_63_end_0 = const()[name = string("normed_63_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_63_end_mask_0 = const()[name = string("normed_63_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_63_cast_fp16 = slice_by_index(begin = normed_63_begin_0, end = normed_63_end_0, end_mask = normed_63_end_mask_0, x = normed_61_cast_fp16)[name = string("normed_63_cast_fp16")]; - tensor var_3381_to_fp16 = const()[name = string("op_3381_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82588928)))]; - tensor attn_output_29_cast_fp16 = mul(x = normed_63_cast_fp16, y = var_3381_to_fp16)[name = string("attn_output_29_cast_fp16")]; - tensor hidden_states_43_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = attn_output_29_cast_fp16)[name = string("hidden_states_43_cast_fp16")]; - int32 var_3394 = const()[name = string("op_3394"), val = int32(-1)]; - fp16 const_106_promoted_to_fp16 = const()[name = string("const_106_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3396_cast_fp16 = mul(x = hidden_states_43_cast_fp16, y = const_106_promoted_to_fp16)[name = string("op_3396_cast_fp16")]; - bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; - tensor input_53_cast_fp16 = concat(axis = var_3394, interleave = input_53_interleave_0, values = (hidden_states_43_cast_fp16, var_3396_cast_fp16))[name = string("input_53_cast_fp16")]; - tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; - fp16 var_3391_to_fp16 = const()[name = string("op_3391_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_3391_to_fp16, x = input_53_cast_fp16)[name = string("normed_65_cast_fp16")]; - tensor normed_67_begin_0 = const()[name = string("normed_67_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_67_end_0 = const()[name = string("normed_67_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_67_end_mask_0 = const()[name = string("normed_67_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_67_cast_fp16 = slice_by_index(begin = normed_67_begin_0, end = normed_67_end_0, end_mask = normed_67_end_mask_0, x = normed_65_cast_fp16)[name = string("normed_67_cast_fp16")]; - tensor var_3410_to_fp16 = const()[name = string("op_3410_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82591296)))]; - tensor x_45_cast_fp16 = mul(x = normed_67_cast_fp16, y = var_3410_to_fp16)[name = string("x_45_cast_fp16")]; - tensor var_3422 = const()[name = string("op_3422"), val = tensor([0, 2, 1])]; - tensor input_55_axes_0 = const()[name = string("input_55_axes_0"), val = tensor([2])]; - tensor var_3423_cast_fp16 = transpose(perm = var_3422, x = x_45_cast_fp16)[name = string("transpose_139")]; - tensor input_55_cast_fp16 = expand_dims(axes = input_55_axes_0, x = var_3423_cast_fp16)[name = string("input_55_cast_fp16")]; - string x_47_pad_type_0 = const()[name = string("x_47_pad_type_0"), val = string("valid")]; - tensor x_47_strides_0 = const()[name = string("x_47_strides_0"), val = tensor([1, 1])]; - tensor x_47_pad_0 = const()[name = string("x_47_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_47_dilations_0 = const()[name = string("x_47_dilations_0"), val = tensor([1, 1])]; - int32 x_47_groups_0 = const()[name = string("x_47_groups_0"), val = int32(1)]; - tensor model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585299072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591271104))))[name = string("model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_47_cast_fp16 = conv(dilations = x_47_dilations_0, groups = x_47_groups_0, pad = x_47_pad_0, pad_type = x_47_pad_type_0, strides = x_47_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_55_cast_fp16)[name = string("x_47_cast_fp16")]; - string b_5_pad_type_0 = const()[name = string("b_5_pad_type_0"), val = string("valid")]; - tensor b_5_strides_0 = const()[name = string("b_5_strides_0"), val = tensor([1, 1])]; - tensor b_5_pad_0 = const()[name = string("b_5_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_5_dilations_0 = const()[name = string("b_5_dilations_0"), val = tensor([1, 1])]; - int32 b_5_groups_0 = const()[name = string("b_5_groups_0"), val = int32(1)]; - tensor model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591381760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597353792))))[name = string("model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_5_cast_fp16 = conv(dilations = b_5_dilations_0, groups = b_5_groups_0, pad = b_5_pad_0, pad_type = b_5_pad_type_0, strides = b_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_55_cast_fp16)[name = string("b_5_cast_fp16")]; - string var_3448_mode_0 = const()[name = string("op_3448_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_3448_cast_fp16 = gelu(mode = var_3448_mode_0, x = x_47_cast_fp16)[name = string("op_3448_cast_fp16")]; - tensor input_57_cast_fp16 = mul(x = var_3448_cast_fp16, y = b_5_cast_fp16)[name = string("input_57_cast_fp16")]; - string e_5_pad_type_0 = const()[name = string("e_5_pad_type_0"), val = string("valid")]; - tensor e_5_strides_0 = const()[name = string("e_5_strides_0"), val = tensor([1, 1])]; - tensor e_5_pad_0 = const()[name = string("e_5_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_5_dilations_0 = const()[name = string("e_5_dilations_0"), val = tensor([1, 1])]; - int32 e_5_groups_0 = const()[name = string("e_5_groups_0"), val = int32(1)]; - tensor model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94759040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100731072))))[name = string("model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_5_cast_fp16 = conv(dilations = e_5_dilations_0, groups = e_5_groups_0, pad = e_5_pad_0, pad_type = e_5_pad_type_0, strides = e_5_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_57_cast_fp16)[name = string("e_5_cast_fp16")]; - tensor var_3456_axes_0 = const()[name = string("op_3456_axes_0"), val = tensor([2])]; - tensor var_3456_cast_fp16 = squeeze(axes = var_3456_axes_0, x = e_5_cast_fp16)[name = string("op_3456_cast_fp16")]; - tensor var_3457 = const()[name = string("op_3457"), val = tensor([0, 2, 1])]; - int32 var_3468 = const()[name = string("op_3468"), val = int32(-1)]; - fp16 const_110_promoted_to_fp16 = const()[name = string("const_110_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_45_cast_fp16 = transpose(perm = var_3457, x = var_3456_cast_fp16)[name = string("transpose_138")]; - tensor var_3470_cast_fp16 = mul(x = hidden_states_45_cast_fp16, y = const_110_promoted_to_fp16)[name = string("op_3470_cast_fp16")]; - bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; - tensor input_59_cast_fp16 = concat(axis = var_3468, interleave = input_59_interleave_0, values = (hidden_states_45_cast_fp16, var_3470_cast_fp16))[name = string("input_59_cast_fp16")]; - tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; - fp16 var_3465_to_fp16 = const()[name = string("op_3465_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_3465_to_fp16, x = input_59_cast_fp16)[name = string("normed_69_cast_fp16")]; - tensor normed_71_begin_0 = const()[name = string("normed_71_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_71_end_0 = const()[name = string("normed_71_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_71_end_mask_0 = const()[name = string("normed_71_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_71_cast_fp16 = slice_by_index(begin = normed_71_begin_0, end = normed_71_end_0, end_mask = normed_71_end_mask_0, x = normed_69_cast_fp16)[name = string("normed_71_cast_fp16")]; - tensor var_3484_to_fp16 = const()[name = string("op_3484_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100749568)))]; - tensor hidden_states_47_cast_fp16 = mul(x = normed_71_cast_fp16, y = var_3484_to_fp16)[name = string("hidden_states_47_cast_fp16")]; - tensor hidden_states_49_cast_fp16 = add(x = hidden_states_43_cast_fp16, y = hidden_states_47_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; - int32 var_3535 = const()[name = string("op_3535"), val = int32(-1)]; - fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3537_cast_fp16 = mul(x = hidden_states_49_cast_fp16, y = const_114_promoted_to_fp16)[name = string("op_3537_cast_fp16")]; - bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)]; - tensor input_61_cast_fp16 = concat(axis = var_3535, interleave = input_61_interleave_0, values = (hidden_states_49_cast_fp16, var_3537_cast_fp16))[name = string("input_61_cast_fp16")]; - tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; - fp16 var_3532_to_fp16 = const()[name = string("op_3532_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_3532_to_fp16, x = input_61_cast_fp16)[name = string("normed_73_cast_fp16")]; - tensor normed_75_begin_0 = const()[name = string("normed_75_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_75_end_0 = const()[name = string("normed_75_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_75_end_mask_0 = const()[name = string("normed_75_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_75_cast_fp16 = slice_by_index(begin = normed_75_begin_0, end = normed_75_end_0, end_mask = normed_75_end_mask_0, x = normed_73_cast_fp16)[name = string("normed_75_cast_fp16")]; - tensor var_3551_to_fp16 = const()[name = string("op_3551_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100751936)))]; - tensor hidden_states_51_cast_fp16 = mul(x = normed_75_cast_fp16, y = var_3551_to_fp16)[name = string("hidden_states_51_cast_fp16")]; - tensor var_3556 = const()[name = string("op_3556"), val = tensor([0, 2, 1])]; - tensor var_3559_axes_0 = const()[name = string("op_3559_axes_0"), val = tensor([2])]; - tensor var_3557_cast_fp16 = transpose(perm = var_3556, x = hidden_states_51_cast_fp16)[name = string("transpose_137")]; - tensor var_3559_cast_fp16 = expand_dims(axes = var_3559_axes_0, x = var_3557_cast_fp16)[name = string("op_3559_cast_fp16")]; - string var_3575_pad_type_0 = const()[name = string("op_3575_pad_type_0"), val = string("valid")]; - tensor var_3575_strides_0 = const()[name = string("op_3575_strides_0"), val = tensor([1, 1])]; - tensor var_3575_pad_0 = const()[name = string("op_3575_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_3575_dilations_0 = const()[name = string("op_3575_dilations_0"), val = tensor([1, 1])]; - int32 var_3575_groups_0 = const()[name = string("op_3575_groups_0"), val = int32(1)]; - tensor var_3575 = conv(dilations = var_3575_dilations_0, groups = var_3575_groups_0, pad = var_3575_pad_0, pad_type = var_3575_pad_type_0, strides = var_3575_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_3559_cast_fp16)[name = string("op_3575")]; - tensor var_3580 = const()[name = string("op_3580"), val = tensor([1, 4, 1, 256])]; - tensor var_3581 = reshape(shape = var_3580, x = var_3575)[name = string("op_3581")]; - string var_3597_pad_type_0 = const()[name = string("op_3597_pad_type_0"), val = string("valid")]; - tensor var_3597_strides_0 = const()[name = string("op_3597_strides_0"), val = tensor([1, 1])]; - tensor var_3597_pad_0 = const()[name = string("op_3597_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_3597_dilations_0 = const()[name = string("op_3597_dilations_0"), val = tensor([1, 1])]; - int32 var_3597_groups_0 = const()[name = string("op_3597_groups_0"), val = int32(1)]; - tensor var_3597 = conv(dilations = var_3597_dilations_0, groups = var_3597_groups_0, pad = var_3597_pad_0, pad_type = var_3597_pad_type_0, strides = var_3597_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_3559_cast_fp16)[name = string("op_3597")]; - tensor var_3602 = const()[name = string("op_3602"), val = tensor([1, 1, 1, 256])]; - tensor var_3603 = reshape(shape = var_3602, x = var_3597)[name = string("op_3603")]; - string var_3619_pad_type_0 = const()[name = string("op_3619_pad_type_0"), val = string("valid")]; - tensor var_3619_strides_0 = const()[name = string("op_3619_strides_0"), val = tensor([1, 1])]; - tensor var_3619_pad_0 = const()[name = string("op_3619_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_3619_dilations_0 = const()[name = string("op_3619_dilations_0"), val = tensor([1, 1])]; - int32 var_3619_groups_0 = const()[name = string("op_3619_groups_0"), val = int32(1)]; - tensor var_3619 = conv(dilations = var_3619_dilations_0, groups = var_3619_groups_0, pad = var_3619_pad_0, pad_type = var_3619_pad_type_0, strides = var_3619_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_3559_cast_fp16)[name = string("op_3619")]; - tensor var_3624 = const()[name = string("op_3624"), val = tensor([1, 1, 1, 256])]; - tensor var_3625 = reshape(shape = var_3624, x = var_3619)[name = string("op_3625")]; - int32 var_3640 = const()[name = string("op_3640"), val = int32(-1)]; - fp16 const_118_promoted = const()[name = string("const_118_promoted"), val = fp16(-0x1p+0)]; - tensor var_3642 = mul(x = var_3581, y = const_118_promoted)[name = string("op_3642")]; - bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; - tensor input_65 = concat(axis = var_3640, interleave = input_65_interleave_0, values = (var_3581, var_3642))[name = string("input_65")]; - tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; - fp16 var_3637_to_fp16 = const()[name = string("op_3637_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_3637_to_fp16, x = input_65)[name = string("normed_77_cast_fp16")]; - tensor normed_79_begin_0 = const()[name = string("normed_79_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_79_end_0 = const()[name = string("normed_79_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_79_end_mask_0 = const()[name = string("normed_79_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_79 = slice_by_index(begin = normed_79_begin_0, end = normed_79_end_0, end_mask = normed_79_end_mask_0, x = normed_77_cast_fp16)[name = string("normed_79")]; - tensor var_3656_to_fp16 = const()[name = string("op_3656_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100754304)))]; - tensor q_7_cast_fp16 = mul(x = normed_79, y = var_3656_to_fp16)[name = string("q_7_cast_fp16")]; - int32 var_3667 = const()[name = string("op_3667"), val = int32(-1)]; - fp16 const_122_promoted = const()[name = string("const_122_promoted"), val = fp16(-0x1p+0)]; - tensor var_3669 = mul(x = var_3603, y = const_122_promoted)[name = string("op_3669")]; - bool input_67_interleave_0 = const()[name = string("input_67_interleave_0"), val = bool(false)]; - tensor input_67 = concat(axis = var_3667, interleave = input_67_interleave_0, values = (var_3603, var_3669))[name = string("input_67")]; - tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; - fp16 var_3664_to_fp16 = const()[name = string("op_3664_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_3664_to_fp16, x = input_67)[name = string("normed_81_cast_fp16")]; - tensor normed_83_begin_0 = const()[name = string("normed_83_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_83_end_0 = const()[name = string("normed_83_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_83_end_mask_0 = const()[name = string("normed_83_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_83 = slice_by_index(begin = normed_83_begin_0, end = normed_83_end_0, end_mask = normed_83_end_mask_0, x = normed_81_cast_fp16)[name = string("normed_83")]; - tensor var_3683_to_fp16 = const()[name = string("op_3683_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100754880)))]; - tensor k_7_cast_fp16 = mul(x = normed_83, y = var_3683_to_fp16)[name = string("k_7_cast_fp16")]; - tensor var_3685_cast_fp16 = mul(x = q_7_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3685_cast_fp16")]; - tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7_cast_fp16)[name = string("x1_13_cast_fp16")]; - tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7_cast_fp16)[name = string("x2_13_cast_fp16")]; - fp16 const_128_promoted_to_fp16 = const()[name = string("const_128_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3706_cast_fp16 = mul(x = x2_13_cast_fp16, y = const_128_promoted_to_fp16)[name = string("op_3706_cast_fp16")]; - int32 var_3708 = const()[name = string("op_3708"), val = int32(-1)]; - bool var_3709_interleave_0 = const()[name = string("op_3709_interleave_0"), val = bool(false)]; - tensor var_3709_cast_fp16 = concat(axis = var_3708, interleave = var_3709_interleave_0, values = (var_3706_cast_fp16, x1_13_cast_fp16))[name = string("op_3709_cast_fp16")]; - tensor var_3710_cast_fp16 = mul(x = var_3709_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3710_cast_fp16")]; - tensor query_states_13_cast_fp16 = add(x = var_3685_cast_fp16, y = var_3710_cast_fp16)[name = string("query_states_13_cast_fp16")]; - tensor var_3713_cast_fp16 = mul(x = k_7_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3713_cast_fp16")]; - tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7_cast_fp16)[name = string("x1_15_cast_fp16")]; - tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7_cast_fp16)[name = string("x2_15_cast_fp16")]; - fp16 const_131_promoted_to_fp16 = const()[name = string("const_131_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3734_cast_fp16 = mul(x = x2_15_cast_fp16, y = const_131_promoted_to_fp16)[name = string("op_3734_cast_fp16")]; - int32 var_3736 = const()[name = string("op_3736"), val = int32(-1)]; - bool var_3737_interleave_0 = const()[name = string("op_3737_interleave_0"), val = bool(false)]; - tensor var_3737_cast_fp16 = concat(axis = var_3736, interleave = var_3737_interleave_0, values = (var_3734_cast_fp16, x1_15_cast_fp16))[name = string("op_3737_cast_fp16")]; - tensor var_3738_cast_fp16 = mul(x = var_3737_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3738_cast_fp16")]; - tensor key_states_13_cast_fp16 = add(x = var_3713_cast_fp16, y = var_3738_cast_fp16)[name = string("key_states_13_cast_fp16")]; - tensor key_slice_7_begin_0 = const()[name = string("key_slice_7_begin_0"), val = tensor([3, 0, 0, 0])]; - tensor key_slice_7_end_0 = const()[name = string("key_slice_7_end_0"), val = tensor([4, 1, 512, 256])]; - tensor key_slice_7_end_mask_0 = const()[name = string("key_slice_7_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_7_cast_fp16 = slice_by_index(begin = key_slice_7_begin_0, end = key_slice_7_end_0, end_mask = key_slice_7_end_mask_0, x = coreml_update_state_57)[name = string("key_slice_7_cast_fp16")]; - tensor key_tail_7_begin_0 = const()[name = string("key_tail_7_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_7_end_0 = const()[name = string("key_tail_7_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_7_cast_fp16 = slice_by_index(begin = key_tail_7_begin_0, end = key_tail_7_end_0, x = key_slice_7_cast_fp16)[name = string("key_tail_7_cast_fp16")]; - int32 var_3751 = const()[name = string("op_3751"), val = int32(2)]; - bool shifted_key_7_interleave_0 = const()[name = string("shifted_key_7_interleave_0"), val = bool(false)]; - tensor shifted_key_7_cast_fp16 = concat(axis = var_3751, interleave = shifted_key_7_interleave_0, values = (key_tail_7_cast_fp16, key_states_13_cast_fp16))[name = string("shifted_key_7_cast_fp16")]; - tensor concat_12 = const()[name = string("concat_12"), val = tensor([3, 0, 0, 0])]; - tensor concat_13 = const()[name = string("concat_13"), val = tensor([4, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_12, begin_mask = model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0, end = concat_13, end_mask = model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_7_stride_0, update = shifted_key_7_cast_fp16, x = coreml_update_state_57)[name = string("model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_58_write_state")]; - tensor coreml_update_state_58 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_58")]; - tensor value_slice_7_begin_0 = const()[name = string("value_slice_7_begin_0"), val = tensor([25, 0, 0, 0])]; - tensor value_slice_7_end_0 = const()[name = string("value_slice_7_end_0"), val = tensor([26, 1, 512, 256])]; - tensor value_slice_7_end_mask_0 = const()[name = string("value_slice_7_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_7_cast_fp16 = slice_by_index(begin = value_slice_7_begin_0, end = value_slice_7_end_0, end_mask = value_slice_7_end_mask_0, x = coreml_update_state_58)[name = string("value_slice_7_cast_fp16")]; - tensor value_tail_7_begin_0 = const()[name = string("value_tail_7_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_7_end_0 = const()[name = string("value_tail_7_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_7_cast_fp16 = slice_by_index(begin = value_tail_7_begin_0, end = value_tail_7_end_0, x = value_slice_7_cast_fp16)[name = string("value_tail_7_cast_fp16")]; - int32 var_3785 = const()[name = string("op_3785"), val = int32(2)]; - bool shifted_value_7_interleave_0 = const()[name = string("shifted_value_7_interleave_0"), val = bool(false)]; - tensor shifted_value_7_cast_fp16 = concat(axis = var_3785, interleave = shifted_value_7_interleave_0, values = (value_tail_7_cast_fp16, var_3625))[name = string("shifted_value_7_cast_fp16")]; - tensor concat_14 = const()[name = string("concat_14"), val = tensor([25, 0, 0, 0])]; - tensor concat_15 = const()[name = string("concat_15"), val = tensor([26, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_8_stride_0, update = shifted_value_7_cast_fp16, x = coreml_update_state_58)[name = string("model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_59_write_state")]; - tensor coreml_update_state_59 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_59")]; - tensor var_3813_begin_0 = const()[name = string("op_3813_begin_0"), val = tensor([3, 0, 0, 0])]; - tensor var_3813_end_0 = const()[name = string("op_3813_end_0"), val = tensor([4, 1, 512, 256])]; - tensor var_3813_end_mask_0 = const()[name = string("op_3813_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_3813_cast_fp16 = slice_by_index(begin = var_3813_begin_0, end = var_3813_end_0, end_mask = var_3813_end_mask_0, x = coreml_update_state_59)[name = string("op_3813_cast_fp16")]; - tensor var_3820_begin_0 = const()[name = string("op_3820_begin_0"), val = tensor([25, 0, 0, 0])]; - tensor var_3820_end_0 = const()[name = string("op_3820_end_0"), val = tensor([26, 1, 512, 256])]; - tensor var_3820_end_mask_0 = const()[name = string("op_3820_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_3820_cast_fp16 = slice_by_index(begin = var_3820_begin_0, end = var_3820_end_0, end_mask = var_3820_end_mask_0, x = coreml_update_state_59)[name = string("op_3820_cast_fp16")]; - tensor var_3857 = const()[name = string("op_3857"), val = tensor([1, 4, 1, 1])]; - tensor x_53_cast_fp16 = tile(reps = var_3857, x = var_3813_cast_fp16)[name = string("x_53_cast_fp16")]; - tensor var_3877 = const()[name = string("op_3877"), val = tensor([1, 4, 1, 1])]; - tensor x_59_cast_fp16 = tile(reps = var_3877, x = var_3820_cast_fp16)[name = string("x_59_cast_fp16")]; - bool var_3904_transpose_x_1 = const()[name = string("op_3904_transpose_x_1"), val = bool(false)]; - bool var_3904_transpose_y_1 = const()[name = string("op_3904_transpose_y_1"), val = bool(true)]; - tensor var_3904 = matmul(transpose_x = var_3904_transpose_x_1, transpose_y = var_3904_transpose_y_1, x = query_states_13_cast_fp16, y = x_53_cast_fp16)[name = string("op_3904")]; - fp16 var_3905_to_fp16 = const()[name = string("op_3905_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_19_cast_fp16 = mul(x = var_3904, y = var_3905_to_fp16)[name = string("attn_weights_19_cast_fp16")]; - tensor attn_weights_21_cast_fp16 = add(x = attn_weights_19_cast_fp16, y = var_2059)[name = string("attn_weights_21_cast_fp16")]; - int32 var_3940 = const()[name = string("op_3940"), val = int32(-1)]; - tensor attn_weights_23_cast_fp16 = softmax(axis = var_3940, x = attn_weights_21_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; - bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; - bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; - tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = attn_weights_23_cast_fp16, y = x_59_cast_fp16)[name = string("attn_output_31_cast_fp16")]; - tensor var_3951_perm_0 = const()[name = string("op_3951_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_3955 = const()[name = string("op_3955"), val = tensor([1, 1, 1024])]; - tensor var_3951_cast_fp16 = transpose(perm = var_3951_perm_0, x = attn_output_31_cast_fp16)[name = string("transpose_136")]; - tensor attn_output_35_cast_fp16 = reshape(shape = var_3955, x = var_3951_cast_fp16)[name = string("attn_output_35_cast_fp16")]; - tensor var_3960 = const()[name = string("op_3960"), val = tensor([0, 2, 1])]; - string var_3976_pad_type_0 = const()[name = string("op_3976_pad_type_0"), val = string("valid")]; - int32 var_3976_groups_0 = const()[name = string("op_3976_groups_0"), val = int32(1)]; - tensor var_3976_strides_0 = const()[name = string("op_3976_strides_0"), val = tensor([1])]; - tensor var_3976_pad_0 = const()[name = string("op_3976_pad_0"), val = tensor([0, 0])]; - tensor var_3976_dilations_0 = const()[name = string("op_3976_dilations_0"), val = tensor([1])]; - tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100755456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101640256))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_3961_cast_fp16 = transpose(perm = var_3960, x = attn_output_35_cast_fp16)[name = string("transpose_135")]; - tensor var_3976_cast_fp16 = conv(dilations = var_3976_dilations_0, groups = var_3976_groups_0, pad = var_3976_pad_0, pad_type = var_3976_pad_type_0, strides = var_3976_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_3961_cast_fp16)[name = string("op_3976_cast_fp16")]; - tensor var_3980 = const()[name = string("op_3980"), val = tensor([0, 2, 1])]; - int32 var_3991 = const()[name = string("op_3991"), val = int32(-1)]; - fp16 const_140_promoted_to_fp16 = const()[name = string("const_140_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_57_cast_fp16 = transpose(perm = var_3980, x = var_3976_cast_fp16)[name = string("transpose_134")]; - tensor var_3993_cast_fp16 = mul(x = hidden_states_57_cast_fp16, y = const_140_promoted_to_fp16)[name = string("op_3993_cast_fp16")]; - bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; - tensor input_71_cast_fp16 = concat(axis = var_3991, interleave = input_71_interleave_0, values = (hidden_states_57_cast_fp16, var_3993_cast_fp16))[name = string("input_71_cast_fp16")]; - tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; - fp16 var_3988_to_fp16 = const()[name = string("op_3988_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_3988_to_fp16, x = input_71_cast_fp16)[name = string("normed_85_cast_fp16")]; - tensor normed_87_begin_0 = const()[name = string("normed_87_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_87_end_0 = const()[name = string("normed_87_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_87_end_mask_0 = const()[name = string("normed_87_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_87_cast_fp16 = slice_by_index(begin = normed_87_begin_0, end = normed_87_end_0, end_mask = normed_87_end_mask_0, x = normed_85_cast_fp16)[name = string("normed_87_cast_fp16")]; - tensor var_4007_to_fp16 = const()[name = string("op_4007_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101658752)))]; - tensor attn_output_39_cast_fp16 = mul(x = normed_87_cast_fp16, y = var_4007_to_fp16)[name = string("attn_output_39_cast_fp16")]; - tensor hidden_states_59_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = attn_output_39_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; - int32 var_4020 = const()[name = string("op_4020"), val = int32(-1)]; - fp16 const_144_promoted_to_fp16 = const()[name = string("const_144_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4022_cast_fp16 = mul(x = hidden_states_59_cast_fp16, y = const_144_promoted_to_fp16)[name = string("op_4022_cast_fp16")]; - bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; - tensor input_73_cast_fp16 = concat(axis = var_4020, interleave = input_73_interleave_0, values = (hidden_states_59_cast_fp16, var_4022_cast_fp16))[name = string("input_73_cast_fp16")]; - tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; - fp16 var_4017_to_fp16 = const()[name = string("op_4017_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_4017_to_fp16, x = input_73_cast_fp16)[name = string("normed_89_cast_fp16")]; - tensor normed_91_begin_0 = const()[name = string("normed_91_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_91_end_0 = const()[name = string("normed_91_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_91_end_mask_0 = const()[name = string("normed_91_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_91_cast_fp16 = slice_by_index(begin = normed_91_begin_0, end = normed_91_end_0, end_mask = normed_91_end_mask_0, x = normed_89_cast_fp16)[name = string("normed_91_cast_fp16")]; - tensor var_4036_to_fp16 = const()[name = string("op_4036_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101661120)))]; - tensor x_61_cast_fp16 = mul(x = normed_91_cast_fp16, y = var_4036_to_fp16)[name = string("x_61_cast_fp16")]; - tensor var_4048 = const()[name = string("op_4048"), val = tensor([0, 2, 1])]; - tensor input_75_axes_0 = const()[name = string("input_75_axes_0"), val = tensor([2])]; - tensor var_4049_cast_fp16 = transpose(perm = var_4048, x = x_61_cast_fp16)[name = string("transpose_133")]; - tensor input_75_cast_fp16 = expand_dims(axes = input_75_axes_0, x = var_4049_cast_fp16)[name = string("input_75_cast_fp16")]; - string x_63_pad_type_0 = const()[name = string("x_63_pad_type_0"), val = string("valid")]; - tensor x_63_strides_0 = const()[name = string("x_63_strides_0"), val = tensor([1, 1])]; - tensor x_63_pad_0 = const()[name = string("x_63_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_63_dilations_0 = const()[name = string("x_63_dilations_0"), val = tensor([1, 1])]; - int32 x_63_groups_0 = const()[name = string("x_63_groups_0"), val = int32(1)]; - tensor model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597464448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(603436480))))[name = string("model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_63_cast_fp16 = conv(dilations = x_63_dilations_0, groups = x_63_groups_0, pad = x_63_pad_0, pad_type = x_63_pad_type_0, strides = x_63_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("x_63_cast_fp16")]; - string b_7_pad_type_0 = const()[name = string("b_7_pad_type_0"), val = string("valid")]; - tensor b_7_strides_0 = const()[name = string("b_7_strides_0"), val = tensor([1, 1])]; - tensor b_7_pad_0 = const()[name = string("b_7_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_7_dilations_0 = const()[name = string("b_7_dilations_0"), val = tensor([1, 1])]; - int32 b_7_groups_0 = const()[name = string("b_7_groups_0"), val = int32(1)]; - tensor model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(603547136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(609519168))))[name = string("model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_7_cast_fp16 = conv(dilations = b_7_dilations_0, groups = b_7_groups_0, pad = b_7_pad_0, pad_type = b_7_pad_type_0, strides = b_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("b_7_cast_fp16")]; - string var_4074_mode_0 = const()[name = string("op_4074_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_4074_cast_fp16 = gelu(mode = var_4074_mode_0, x = x_63_cast_fp16)[name = string("op_4074_cast_fp16")]; - tensor input_77_cast_fp16 = mul(x = var_4074_cast_fp16, y = b_7_cast_fp16)[name = string("input_77_cast_fp16")]; - string e_7_pad_type_0 = const()[name = string("e_7_pad_type_0"), val = string("valid")]; - tensor e_7_strides_0 = const()[name = string("e_7_strides_0"), val = tensor([1, 1])]; - tensor e_7_pad_0 = const()[name = string("e_7_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_7_dilations_0 = const()[name = string("e_7_dilations_0"), val = tensor([1, 1])]; - int32 e_7_groups_0 = const()[name = string("e_7_groups_0"), val = int32(1)]; - tensor model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113828864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119800896))))[name = string("model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_7_cast_fp16 = conv(dilations = e_7_dilations_0, groups = e_7_groups_0, pad = e_7_pad_0, pad_type = e_7_pad_type_0, strides = e_7_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_77_cast_fp16)[name = string("e_7_cast_fp16")]; - tensor var_4082_axes_0 = const()[name = string("op_4082_axes_0"), val = tensor([2])]; - tensor var_4082_cast_fp16 = squeeze(axes = var_4082_axes_0, x = e_7_cast_fp16)[name = string("op_4082_cast_fp16")]; - tensor var_4083 = const()[name = string("op_4083"), val = tensor([0, 2, 1])]; - int32 var_4094 = const()[name = string("op_4094"), val = int32(-1)]; - fp16 const_148_promoted_to_fp16 = const()[name = string("const_148_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_61_cast_fp16 = transpose(perm = var_4083, x = var_4082_cast_fp16)[name = string("transpose_132")]; - tensor var_4096_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_148_promoted_to_fp16)[name = string("op_4096_cast_fp16")]; - bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)]; - tensor input_79_cast_fp16 = concat(axis = var_4094, interleave = input_79_interleave_0, values = (hidden_states_61_cast_fp16, var_4096_cast_fp16))[name = string("input_79_cast_fp16")]; - tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; - fp16 var_4091_to_fp16 = const()[name = string("op_4091_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_4091_to_fp16, x = input_79_cast_fp16)[name = string("normed_93_cast_fp16")]; - tensor normed_95_begin_0 = const()[name = string("normed_95_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_95_end_0 = const()[name = string("normed_95_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_95_end_mask_0 = const()[name = string("normed_95_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_95_cast_fp16 = slice_by_index(begin = normed_95_begin_0, end = normed_95_end_0, end_mask = normed_95_end_mask_0, x = normed_93_cast_fp16)[name = string("normed_95_cast_fp16")]; - tensor var_4110_to_fp16 = const()[name = string("op_4110_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119819392)))]; - tensor hidden_states_63_cast_fp16 = mul(x = normed_95_cast_fp16, y = var_4110_to_fp16)[name = string("hidden_states_63_cast_fp16")]; - tensor hidden_states_65_cast_fp16 = add(x = hidden_states_59_cast_fp16, y = hidden_states_63_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; - int32 var_4161 = const()[name = string("op_4161"), val = int32(-1)]; - fp16 const_152_promoted_to_fp16 = const()[name = string("const_152_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4163_cast_fp16 = mul(x = hidden_states_65_cast_fp16, y = const_152_promoted_to_fp16)[name = string("op_4163_cast_fp16")]; - bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; - tensor input_81_cast_fp16 = concat(axis = var_4161, interleave = input_81_interleave_0, values = (hidden_states_65_cast_fp16, var_4163_cast_fp16))[name = string("input_81_cast_fp16")]; - tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; - fp16 var_4158_to_fp16 = const()[name = string("op_4158_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_4158_to_fp16, x = input_81_cast_fp16)[name = string("normed_97_cast_fp16")]; - tensor normed_99_begin_0 = const()[name = string("normed_99_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_99_end_0 = const()[name = string("normed_99_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_99_end_mask_0 = const()[name = string("normed_99_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_99_cast_fp16 = slice_by_index(begin = normed_99_begin_0, end = normed_99_end_0, end_mask = normed_99_end_mask_0, x = normed_97_cast_fp16)[name = string("normed_99_cast_fp16")]; - tensor var_4177_to_fp16 = const()[name = string("op_4177_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119821760)))]; - tensor hidden_states_67_cast_fp16 = mul(x = normed_99_cast_fp16, y = var_4177_to_fp16)[name = string("hidden_states_67_cast_fp16")]; - tensor var_4182 = const()[name = string("op_4182"), val = tensor([0, 2, 1])]; - tensor var_4185_axes_0 = const()[name = string("op_4185_axes_0"), val = tensor([2])]; - tensor var_4183_cast_fp16 = transpose(perm = var_4182, x = hidden_states_67_cast_fp16)[name = string("transpose_131")]; - tensor var_4185_cast_fp16 = expand_dims(axes = var_4185_axes_0, x = var_4183_cast_fp16)[name = string("op_4185_cast_fp16")]; - string var_4201_pad_type_0 = const()[name = string("op_4201_pad_type_0"), val = string("valid")]; - tensor var_4201_strides_0 = const()[name = string("op_4201_strides_0"), val = tensor([1, 1])]; - tensor var_4201_pad_0 = const()[name = string("op_4201_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_4201_dilations_0 = const()[name = string("op_4201_dilations_0"), val = tensor([1, 1])]; - int32 var_4201_groups_0 = const()[name = string("op_4201_groups_0"), val = int32(1)]; - tensor var_4201 = conv(dilations = var_4201_dilations_0, groups = var_4201_groups_0, pad = var_4201_pad_0, pad_type = var_4201_pad_type_0, strides = var_4201_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_4185_cast_fp16)[name = string("op_4201")]; - tensor var_4206 = const()[name = string("op_4206"), val = tensor([1, 4, 1, 256])]; - tensor var_4207 = reshape(shape = var_4206, x = var_4201)[name = string("op_4207")]; - string var_4223_pad_type_0 = const()[name = string("op_4223_pad_type_0"), val = string("valid")]; - tensor var_4223_strides_0 = const()[name = string("op_4223_strides_0"), val = tensor([1, 1])]; - tensor var_4223_pad_0 = const()[name = string("op_4223_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_4223_dilations_0 = const()[name = string("op_4223_dilations_0"), val = tensor([1, 1])]; - int32 var_4223_groups_0 = const()[name = string("op_4223_groups_0"), val = int32(1)]; - tensor var_4223 = conv(dilations = var_4223_dilations_0, groups = var_4223_groups_0, pad = var_4223_pad_0, pad_type = var_4223_pad_type_0, strides = var_4223_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_4185_cast_fp16)[name = string("op_4223")]; - tensor var_4228 = const()[name = string("op_4228"), val = tensor([1, 1, 1, 256])]; - tensor var_4229 = reshape(shape = var_4228, x = var_4223)[name = string("op_4229")]; - string var_4245_pad_type_0 = const()[name = string("op_4245_pad_type_0"), val = string("valid")]; - tensor var_4245_strides_0 = const()[name = string("op_4245_strides_0"), val = tensor([1, 1])]; - tensor var_4245_pad_0 = const()[name = string("op_4245_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_4245_dilations_0 = const()[name = string("op_4245_dilations_0"), val = tensor([1, 1])]; - int32 var_4245_groups_0 = const()[name = string("op_4245_groups_0"), val = int32(1)]; - tensor var_4245 = conv(dilations = var_4245_dilations_0, groups = var_4245_groups_0, pad = var_4245_pad_0, pad_type = var_4245_pad_type_0, strides = var_4245_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_4185_cast_fp16)[name = string("op_4245")]; - tensor var_4250 = const()[name = string("op_4250"), val = tensor([1, 1, 1, 256])]; - tensor var_4251 = reshape(shape = var_4250, x = var_4245)[name = string("op_4251")]; - int32 var_4266 = const()[name = string("op_4266"), val = int32(-1)]; - fp16 const_156_promoted = const()[name = string("const_156_promoted"), val = fp16(-0x1p+0)]; - tensor var_4268 = mul(x = var_4207, y = const_156_promoted)[name = string("op_4268")]; - bool input_85_interleave_0 = const()[name = string("input_85_interleave_0"), val = bool(false)]; - tensor input_85 = concat(axis = var_4266, interleave = input_85_interleave_0, values = (var_4207, var_4268))[name = string("input_85")]; - tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; - fp16 var_4263_to_fp16 = const()[name = string("op_4263_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_4263_to_fp16, x = input_85)[name = string("normed_101_cast_fp16")]; - tensor normed_103_begin_0 = const()[name = string("normed_103_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_103_end_0 = const()[name = string("normed_103_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_103_end_mask_0 = const()[name = string("normed_103_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_103 = slice_by_index(begin = normed_103_begin_0, end = normed_103_end_0, end_mask = normed_103_end_mask_0, x = normed_101_cast_fp16)[name = string("normed_103")]; - tensor var_4282_to_fp16 = const()[name = string("op_4282_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119824128)))]; - tensor q_9_cast_fp16 = mul(x = normed_103, y = var_4282_to_fp16)[name = string("q_9_cast_fp16")]; - int32 var_4293 = const()[name = string("op_4293"), val = int32(-1)]; - fp16 const_160_promoted = const()[name = string("const_160_promoted"), val = fp16(-0x1p+0)]; - tensor var_4295 = mul(x = var_4229, y = const_160_promoted)[name = string("op_4295")]; - bool input_87_interleave_0 = const()[name = string("input_87_interleave_0"), val = bool(false)]; - tensor input_87 = concat(axis = var_4293, interleave = input_87_interleave_0, values = (var_4229, var_4295))[name = string("input_87")]; - tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; - fp16 var_4290_to_fp16 = const()[name = string("op_4290_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_4290_to_fp16, x = input_87)[name = string("normed_105_cast_fp16")]; - tensor normed_107_begin_0 = const()[name = string("normed_107_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_107_end_0 = const()[name = string("normed_107_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_107_end_mask_0 = const()[name = string("normed_107_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_107 = slice_by_index(begin = normed_107_begin_0, end = normed_107_end_0, end_mask = normed_107_end_mask_0, x = normed_105_cast_fp16)[name = string("normed_107")]; - tensor var_4309_to_fp16 = const()[name = string("op_4309_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119824704)))]; - tensor k_9_cast_fp16 = mul(x = normed_107, y = var_4309_to_fp16)[name = string("k_9_cast_fp16")]; - tensor var_4311_cast_fp16 = mul(x = q_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4311_cast_fp16")]; - tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9_cast_fp16)[name = string("x1_17_cast_fp16")]; - tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9_cast_fp16)[name = string("x2_17_cast_fp16")]; - fp16 const_166_promoted_to_fp16 = const()[name = string("const_166_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4332_cast_fp16 = mul(x = x2_17_cast_fp16, y = const_166_promoted_to_fp16)[name = string("op_4332_cast_fp16")]; - int32 var_4334 = const()[name = string("op_4334"), val = int32(-1)]; - bool var_4335_interleave_0 = const()[name = string("op_4335_interleave_0"), val = bool(false)]; - tensor var_4335_cast_fp16 = concat(axis = var_4334, interleave = var_4335_interleave_0, values = (var_4332_cast_fp16, x1_17_cast_fp16))[name = string("op_4335_cast_fp16")]; - tensor var_4336_cast_fp16 = mul(x = var_4335_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4336_cast_fp16")]; - tensor query_states_17_cast_fp16 = add(x = var_4311_cast_fp16, y = var_4336_cast_fp16)[name = string("query_states_17_cast_fp16")]; - tensor var_4339_cast_fp16 = mul(x = k_9_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4339_cast_fp16")]; - tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9_cast_fp16)[name = string("x1_19_cast_fp16")]; - tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9_cast_fp16)[name = string("x2_19_cast_fp16")]; - fp16 const_169_promoted_to_fp16 = const()[name = string("const_169_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4360_cast_fp16 = mul(x = x2_19_cast_fp16, y = const_169_promoted_to_fp16)[name = string("op_4360_cast_fp16")]; - int32 var_4362 = const()[name = string("op_4362"), val = int32(-1)]; - bool var_4363_interleave_0 = const()[name = string("op_4363_interleave_0"), val = bool(false)]; - tensor var_4363_cast_fp16 = concat(axis = var_4362, interleave = var_4363_interleave_0, values = (var_4360_cast_fp16, x1_19_cast_fp16))[name = string("op_4363_cast_fp16")]; - tensor var_4364_cast_fp16 = mul(x = var_4363_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4364_cast_fp16")]; - tensor key_states_17_cast_fp16 = add(x = var_4339_cast_fp16, y = var_4364_cast_fp16)[name = string("key_states_17_cast_fp16")]; - tensor key_slice_9_begin_0 = const()[name = string("key_slice_9_begin_0"), val = tensor([4, 0, 0, 0])]; - tensor key_slice_9_end_0 = const()[name = string("key_slice_9_end_0"), val = tensor([5, 1, 512, 256])]; - tensor key_slice_9_end_mask_0 = const()[name = string("key_slice_9_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_9_cast_fp16 = slice_by_index(begin = key_slice_9_begin_0, end = key_slice_9_end_0, end_mask = key_slice_9_end_mask_0, x = coreml_update_state_59)[name = string("key_slice_9_cast_fp16")]; - tensor key_tail_9_begin_0 = const()[name = string("key_tail_9_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_9_end_0 = const()[name = string("key_tail_9_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_9_cast_fp16 = slice_by_index(begin = key_tail_9_begin_0, end = key_tail_9_end_0, x = key_slice_9_cast_fp16)[name = string("key_tail_9_cast_fp16")]; - int32 var_4377 = const()[name = string("op_4377"), val = int32(2)]; - bool shifted_key_9_interleave_0 = const()[name = string("shifted_key_9_interleave_0"), val = bool(false)]; - tensor shifted_key_9_cast_fp16 = concat(axis = var_4377, interleave = shifted_key_9_interleave_0, values = (key_tail_9_cast_fp16, key_states_17_cast_fp16))[name = string("shifted_key_9_cast_fp16")]; - tensor concat_16 = const()[name = string("concat_16"), val = tensor([4, 0, 0, 0])]; - tensor concat_17 = const()[name = string("concat_17"), val = tensor([5, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_16, begin_mask = model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0, end = concat_17, end_mask = model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_9_stride_0, update = shifted_key_9_cast_fp16, x = coreml_update_state_59)[name = string("model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_60_write_state")]; - tensor coreml_update_state_60 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_60")]; - tensor value_slice_9_begin_0 = const()[name = string("value_slice_9_begin_0"), val = tensor([26, 0, 0, 0])]; - tensor value_slice_9_end_0 = const()[name = string("value_slice_9_end_0"), val = tensor([27, 1, 512, 256])]; - tensor value_slice_9_end_mask_0 = const()[name = string("value_slice_9_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_9_cast_fp16 = slice_by_index(begin = value_slice_9_begin_0, end = value_slice_9_end_0, end_mask = value_slice_9_end_mask_0, x = coreml_update_state_60)[name = string("value_slice_9_cast_fp16")]; - tensor value_tail_9_begin_0 = const()[name = string("value_tail_9_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_9_end_0 = const()[name = string("value_tail_9_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_9_cast_fp16 = slice_by_index(begin = value_tail_9_begin_0, end = value_tail_9_end_0, x = value_slice_9_cast_fp16)[name = string("value_tail_9_cast_fp16")]; - int32 var_4411 = const()[name = string("op_4411"), val = int32(2)]; - bool shifted_value_9_interleave_0 = const()[name = string("shifted_value_9_interleave_0"), val = bool(false)]; - tensor shifted_value_9_cast_fp16 = concat(axis = var_4411, interleave = shifted_value_9_interleave_0, values = (value_tail_9_cast_fp16, var_4251))[name = string("shifted_value_9_cast_fp16")]; - tensor concat_18 = const()[name = string("concat_18"), val = tensor([26, 0, 0, 0])]; - tensor concat_19 = const()[name = string("concat_19"), val = tensor([27, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_10_stride_0, update = shifted_value_9_cast_fp16, x = coreml_update_state_60)[name = string("model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_61_write_state")]; - tensor coreml_update_state_61 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_61")]; - tensor var_4439_begin_0 = const()[name = string("op_4439_begin_0"), val = tensor([4, 0, 0, 0])]; - tensor var_4439_end_0 = const()[name = string("op_4439_end_0"), val = tensor([5, 1, 512, 256])]; - tensor var_4439_end_mask_0 = const()[name = string("op_4439_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_4439_cast_fp16 = slice_by_index(begin = var_4439_begin_0, end = var_4439_end_0, end_mask = var_4439_end_mask_0, x = coreml_update_state_61)[name = string("op_4439_cast_fp16")]; - tensor var_4446_begin_0 = const()[name = string("op_4446_begin_0"), val = tensor([26, 0, 0, 0])]; - tensor var_4446_end_0 = const()[name = string("op_4446_end_0"), val = tensor([27, 1, 512, 256])]; - tensor var_4446_end_mask_0 = const()[name = string("op_4446_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_4446_cast_fp16 = slice_by_index(begin = var_4446_begin_0, end = var_4446_end_0, end_mask = var_4446_end_mask_0, x = coreml_update_state_61)[name = string("op_4446_cast_fp16")]; - tensor var_4483 = const()[name = string("op_4483"), val = tensor([1, 4, 1, 1])]; - tensor x_69_cast_fp16 = tile(reps = var_4483, x = var_4439_cast_fp16)[name = string("x_69_cast_fp16")]; - tensor var_4503 = const()[name = string("op_4503"), val = tensor([1, 4, 1, 1])]; - tensor x_75_cast_fp16 = tile(reps = var_4503, x = var_4446_cast_fp16)[name = string("x_75_cast_fp16")]; - bool var_4530_transpose_x_1 = const()[name = string("op_4530_transpose_x_1"), val = bool(false)]; - bool var_4530_transpose_y_1 = const()[name = string("op_4530_transpose_y_1"), val = bool(true)]; - tensor var_4530 = matmul(transpose_x = var_4530_transpose_x_1, transpose_y = var_4530_transpose_y_1, x = query_states_17_cast_fp16, y = x_69_cast_fp16)[name = string("op_4530")]; - fp16 var_4531_to_fp16 = const()[name = string("op_4531_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_25_cast_fp16 = mul(x = var_4530, y = var_4531_to_fp16)[name = string("attn_weights_25_cast_fp16")]; - tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = var_2059)[name = string("attn_weights_27_cast_fp16")]; - int32 var_4566 = const()[name = string("op_4566"), val = int32(-1)]; - tensor attn_weights_29_cast_fp16 = softmax(axis = var_4566, x = attn_weights_27_cast_fp16)[name = string("attn_weights_29_cast_fp16")]; - bool attn_output_41_transpose_x_0 = const()[name = string("attn_output_41_transpose_x_0"), val = bool(false)]; - bool attn_output_41_transpose_y_0 = const()[name = string("attn_output_41_transpose_y_0"), val = bool(false)]; - tensor attn_output_41_cast_fp16 = matmul(transpose_x = attn_output_41_transpose_x_0, transpose_y = attn_output_41_transpose_y_0, x = attn_weights_29_cast_fp16, y = x_75_cast_fp16)[name = string("attn_output_41_cast_fp16")]; - tensor var_4577_perm_0 = const()[name = string("op_4577_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_4581 = const()[name = string("op_4581"), val = tensor([1, 1, 1024])]; - tensor var_4577_cast_fp16 = transpose(perm = var_4577_perm_0, x = attn_output_41_cast_fp16)[name = string("transpose_130")]; - tensor attn_output_45_cast_fp16 = reshape(shape = var_4581, x = var_4577_cast_fp16)[name = string("attn_output_45_cast_fp16")]; - tensor var_4586 = const()[name = string("op_4586"), val = tensor([0, 2, 1])]; - string var_4602_pad_type_0 = const()[name = string("op_4602_pad_type_0"), val = string("valid")]; - int32 var_4602_groups_0 = const()[name = string("op_4602_groups_0"), val = int32(1)]; - tensor var_4602_strides_0 = const()[name = string("op_4602_strides_0"), val = tensor([1])]; - tensor var_4602_pad_0 = const()[name = string("op_4602_pad_0"), val = tensor([0, 0])]; - tensor var_4602_dilations_0 = const()[name = string("op_4602_dilations_0"), val = tensor([1])]; - tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119825280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120710080))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_4587_cast_fp16 = transpose(perm = var_4586, x = attn_output_45_cast_fp16)[name = string("transpose_129")]; - tensor var_4602_cast_fp16 = conv(dilations = var_4602_dilations_0, groups = var_4602_groups_0, pad = var_4602_pad_0, pad_type = var_4602_pad_type_0, strides = var_4602_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_4587_cast_fp16)[name = string("op_4602_cast_fp16")]; - tensor var_4606 = const()[name = string("op_4606"), val = tensor([0, 2, 1])]; - int32 var_4617 = const()[name = string("op_4617"), val = int32(-1)]; - fp16 const_178_promoted_to_fp16 = const()[name = string("const_178_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_73_cast_fp16 = transpose(perm = var_4606, x = var_4602_cast_fp16)[name = string("transpose_128")]; - tensor var_4619_cast_fp16 = mul(x = hidden_states_73_cast_fp16, y = const_178_promoted_to_fp16)[name = string("op_4619_cast_fp16")]; - bool input_91_interleave_0 = const()[name = string("input_91_interleave_0"), val = bool(false)]; - tensor input_91_cast_fp16 = concat(axis = var_4617, interleave = input_91_interleave_0, values = (hidden_states_73_cast_fp16, var_4619_cast_fp16))[name = string("input_91_cast_fp16")]; - tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; - fp16 var_4614_to_fp16 = const()[name = string("op_4614_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_4614_to_fp16, x = input_91_cast_fp16)[name = string("normed_109_cast_fp16")]; - tensor normed_111_begin_0 = const()[name = string("normed_111_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_111_end_0 = const()[name = string("normed_111_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_111_end_mask_0 = const()[name = string("normed_111_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_111_cast_fp16 = slice_by_index(begin = normed_111_begin_0, end = normed_111_end_0, end_mask = normed_111_end_mask_0, x = normed_109_cast_fp16)[name = string("normed_111_cast_fp16")]; - tensor var_4633_to_fp16 = const()[name = string("op_4633_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120728576)))]; - tensor attn_output_49_cast_fp16 = mul(x = normed_111_cast_fp16, y = var_4633_to_fp16)[name = string("attn_output_49_cast_fp16")]; - tensor hidden_states_75_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = attn_output_49_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; - int32 var_4646 = const()[name = string("op_4646"), val = int32(-1)]; - fp16 const_182_promoted_to_fp16 = const()[name = string("const_182_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4648_cast_fp16 = mul(x = hidden_states_75_cast_fp16, y = const_182_promoted_to_fp16)[name = string("op_4648_cast_fp16")]; - bool input_93_interleave_0 = const()[name = string("input_93_interleave_0"), val = bool(false)]; - tensor input_93_cast_fp16 = concat(axis = var_4646, interleave = input_93_interleave_0, values = (hidden_states_75_cast_fp16, var_4648_cast_fp16))[name = string("input_93_cast_fp16")]; - tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; - fp16 var_4643_to_fp16 = const()[name = string("op_4643_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_4643_to_fp16, x = input_93_cast_fp16)[name = string("normed_113_cast_fp16")]; - tensor normed_115_begin_0 = const()[name = string("normed_115_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_115_end_0 = const()[name = string("normed_115_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_115_end_mask_0 = const()[name = string("normed_115_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_115_cast_fp16 = slice_by_index(begin = normed_115_begin_0, end = normed_115_end_0, end_mask = normed_115_end_mask_0, x = normed_113_cast_fp16)[name = string("normed_115_cast_fp16")]; - tensor var_4662_to_fp16 = const()[name = string("op_4662_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120730944)))]; - tensor x_77_cast_fp16 = mul(x = normed_115_cast_fp16, y = var_4662_to_fp16)[name = string("x_77_cast_fp16")]; - tensor var_4674 = const()[name = string("op_4674"), val = tensor([0, 2, 1])]; - tensor input_95_axes_0 = const()[name = string("input_95_axes_0"), val = tensor([2])]; - tensor var_4675_cast_fp16 = transpose(perm = var_4674, x = x_77_cast_fp16)[name = string("transpose_127")]; - tensor input_95_cast_fp16 = expand_dims(axes = input_95_axes_0, x = var_4675_cast_fp16)[name = string("input_95_cast_fp16")]; - string x_79_pad_type_0 = const()[name = string("x_79_pad_type_0"), val = string("valid")]; - tensor x_79_strides_0 = const()[name = string("x_79_strides_0"), val = tensor([1, 1])]; - tensor x_79_pad_0 = const()[name = string("x_79_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_79_dilations_0 = const()[name = string("x_79_dilations_0"), val = tensor([1, 1])]; - int32 x_79_groups_0 = const()[name = string("x_79_groups_0"), val = int32(1)]; - tensor model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(609629824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(615601856))))[name = string("model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_79_cast_fp16 = conv(dilations = x_79_dilations_0, groups = x_79_groups_0, pad = x_79_pad_0, pad_type = x_79_pad_type_0, strides = x_79_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_95_cast_fp16)[name = string("x_79_cast_fp16")]; - string b_9_pad_type_0 = const()[name = string("b_9_pad_type_0"), val = string("valid")]; - tensor b_9_strides_0 = const()[name = string("b_9_strides_0"), val = tensor([1, 1])]; - tensor b_9_pad_0 = const()[name = string("b_9_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_9_dilations_0 = const()[name = string("b_9_dilations_0"), val = tensor([1, 1])]; - int32 b_9_groups_0 = const()[name = string("b_9_groups_0"), val = int32(1)]; - tensor model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(615712512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(621684544))))[name = string("model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_9_cast_fp16 = conv(dilations = b_9_dilations_0, groups = b_9_groups_0, pad = b_9_pad_0, pad_type = b_9_pad_type_0, strides = b_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_95_cast_fp16)[name = string("b_9_cast_fp16")]; - string var_4700_mode_0 = const()[name = string("op_4700_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_4700_cast_fp16 = gelu(mode = var_4700_mode_0, x = x_79_cast_fp16)[name = string("op_4700_cast_fp16")]; - tensor input_97_cast_fp16 = mul(x = var_4700_cast_fp16, y = b_9_cast_fp16)[name = string("input_97_cast_fp16")]; - string e_9_pad_type_0 = const()[name = string("e_9_pad_type_0"), val = string("valid")]; - tensor e_9_strides_0 = const()[name = string("e_9_strides_0"), val = tensor([1, 1])]; - tensor e_9_pad_0 = const()[name = string("e_9_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_9_dilations_0 = const()[name = string("e_9_dilations_0"), val = tensor([1, 1])]; - int32 e_9_groups_0 = const()[name = string("e_9_groups_0"), val = int32(1)]; - tensor model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132898688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138870720))))[name = string("model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_9_cast_fp16 = conv(dilations = e_9_dilations_0, groups = e_9_groups_0, pad = e_9_pad_0, pad_type = e_9_pad_type_0, strides = e_9_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_97_cast_fp16)[name = string("e_9_cast_fp16")]; - tensor var_4708_axes_0 = const()[name = string("op_4708_axes_0"), val = tensor([2])]; - tensor var_4708_cast_fp16 = squeeze(axes = var_4708_axes_0, x = e_9_cast_fp16)[name = string("op_4708_cast_fp16")]; - tensor var_4709 = const()[name = string("op_4709"), val = tensor([0, 2, 1])]; - int32 var_4720 = const()[name = string("op_4720"), val = int32(-1)]; - fp16 const_186_promoted_to_fp16 = const()[name = string("const_186_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_77_cast_fp16 = transpose(perm = var_4709, x = var_4708_cast_fp16)[name = string("transpose_126")]; - tensor var_4722_cast_fp16 = mul(x = hidden_states_77_cast_fp16, y = const_186_promoted_to_fp16)[name = string("op_4722_cast_fp16")]; - bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)]; - tensor input_99_cast_fp16 = concat(axis = var_4720, interleave = input_99_interleave_0, values = (hidden_states_77_cast_fp16, var_4722_cast_fp16))[name = string("input_99_cast_fp16")]; - tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; - fp16 var_4717_to_fp16 = const()[name = string("op_4717_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_4717_to_fp16, x = input_99_cast_fp16)[name = string("normed_117_cast_fp16")]; - tensor normed_119_begin_0 = const()[name = string("normed_119_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_119_end_0 = const()[name = string("normed_119_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_119_end_mask_0 = const()[name = string("normed_119_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_119_cast_fp16 = slice_by_index(begin = normed_119_begin_0, end = normed_119_end_0, end_mask = normed_119_end_mask_0, x = normed_117_cast_fp16)[name = string("normed_119_cast_fp16")]; - tensor var_4736_to_fp16 = const()[name = string("op_4736_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138889216)))]; - tensor hidden_states_79_cast_fp16 = mul(x = normed_119_cast_fp16, y = var_4736_to_fp16)[name = string("hidden_states_79_cast_fp16")]; - tensor hidden_states_81_cast_fp16 = add(x = hidden_states_75_cast_fp16, y = hidden_states_79_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; - int32 var_4748_axis_0 = const()[name = string("op_4748_axis_0"), val = int32(1)]; - int32 var_4748_batch_dims_0 = const()[name = string("op_4748_batch_dims_0"), val = int32(0)]; - bool var_4748_validate_indices_0 = const()[name = string("op_4748_validate_indices_0"), val = bool(false)]; - tensor var_4740_to_fp16 = const()[name = string("op_4740_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138891584)))]; - tensor var_4748_cast_fp16_cast_uint16 = gather(axis = var_4748_axis_0, batch_dims = var_4748_batch_dims_0, indices = current_pos_to_uint16, validate_indices = var_4748_validate_indices_0, x = var_4740_to_fp16)[name = string("op_4748_cast_fp16_cast_uint16")]; - tensor var_4753 = const()[name = string("op_4753"), val = tensor([1, 1, 1, -1])]; - tensor sin_21_cast_fp16 = reshape(shape = var_4753, x = var_4748_cast_fp16_cast_uint16)[name = string("sin_21_cast_fp16")]; - int32 var_4763_axis_0 = const()[name = string("op_4763_axis_0"), val = int32(1)]; - int32 var_4763_batch_dims_0 = const()[name = string("op_4763_batch_dims_0"), val = int32(0)]; - bool var_4763_validate_indices_0 = const()[name = string("op_4763_validate_indices_0"), val = bool(false)]; - tensor var_4755_to_fp16 = const()[name = string("op_4755_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143085952)))]; - tensor var_4763_cast_fp16_cast_uint16 = gather(axis = var_4763_axis_0, batch_dims = var_4763_batch_dims_0, indices = current_pos_to_uint16, validate_indices = var_4763_validate_indices_0, x = var_4755_to_fp16)[name = string("op_4763_cast_fp16_cast_uint16")]; - tensor var_4768 = const()[name = string("op_4768"), val = tensor([1, 1, 1, -1])]; - tensor cos_21_cast_fp16 = reshape(shape = var_4768, x = var_4763_cast_fp16_cast_uint16)[name = string("cos_21_cast_fp16")]; - int32 var_4789 = const()[name = string("op_4789"), val = int32(-1)]; - fp16 const_190_promoted_to_fp16 = const()[name = string("const_190_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4791_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_190_promoted_to_fp16)[name = string("op_4791_cast_fp16")]; - bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; - tensor input_101_cast_fp16 = concat(axis = var_4789, interleave = input_101_interleave_0, values = (hidden_states_81_cast_fp16, var_4791_cast_fp16))[name = string("input_101_cast_fp16")]; - tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; - fp16 var_4786_to_fp16 = const()[name = string("op_4786_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_4786_to_fp16, x = input_101_cast_fp16)[name = string("normed_121_cast_fp16")]; - tensor normed_123_begin_0 = const()[name = string("normed_123_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_123_end_0 = const()[name = string("normed_123_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_123_end_mask_0 = const()[name = string("normed_123_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_123_cast_fp16 = slice_by_index(begin = normed_123_begin_0, end = normed_123_end_0, end_mask = normed_123_end_mask_0, x = normed_121_cast_fp16)[name = string("normed_123_cast_fp16")]; - tensor var_4805_to_fp16 = const()[name = string("op_4805_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147280320)))]; - tensor hidden_states_83_cast_fp16 = mul(x = normed_123_cast_fp16, y = var_4805_to_fp16)[name = string("hidden_states_83_cast_fp16")]; - tensor var_4810 = const()[name = string("op_4810"), val = tensor([0, 2, 1])]; - tensor var_4813_axes_0 = const()[name = string("op_4813_axes_0"), val = tensor([2])]; - tensor var_4811_cast_fp16 = transpose(perm = var_4810, x = hidden_states_83_cast_fp16)[name = string("transpose_125")]; - tensor var_4813_cast_fp16 = expand_dims(axes = var_4813_axes_0, x = var_4811_cast_fp16)[name = string("op_4813_cast_fp16")]; - string var_4829_pad_type_0 = const()[name = string("op_4829_pad_type_0"), val = string("valid")]; - tensor var_4829_strides_0 = const()[name = string("op_4829_strides_0"), val = tensor([1, 1])]; - tensor var_4829_pad_0 = const()[name = string("op_4829_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_4829_dilations_0 = const()[name = string("op_4829_dilations_0"), val = tensor([1, 1])]; - int32 var_4829_groups_0 = const()[name = string("op_4829_groups_0"), val = int32(1)]; - tensor var_4829 = conv(dilations = var_4829_dilations_0, groups = var_4829_groups_0, pad = var_4829_pad_0, pad_type = var_4829_pad_type_0, strides = var_4829_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_4813_cast_fp16)[name = string("op_4829")]; - tensor var_4834 = const()[name = string("op_4834"), val = tensor([1, 4, 1, 256])]; - tensor var_4835 = reshape(shape = var_4834, x = var_4829)[name = string("op_4835")]; - string var_4851_pad_type_0 = const()[name = string("op_4851_pad_type_0"), val = string("valid")]; - tensor var_4851_strides_0 = const()[name = string("op_4851_strides_0"), val = tensor([1, 1])]; - tensor var_4851_pad_0 = const()[name = string("op_4851_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_4851_dilations_0 = const()[name = string("op_4851_dilations_0"), val = tensor([1, 1])]; - int32 var_4851_groups_0 = const()[name = string("op_4851_groups_0"), val = int32(1)]; - tensor var_4851 = conv(dilations = var_4851_dilations_0, groups = var_4851_groups_0, pad = var_4851_pad_0, pad_type = var_4851_pad_type_0, strides = var_4851_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_4813_cast_fp16)[name = string("op_4851")]; - tensor var_4856 = const()[name = string("op_4856"), val = tensor([1, 1, 1, 256])]; - tensor var_4857 = reshape(shape = var_4856, x = var_4851)[name = string("op_4857")]; - string var_4873_pad_type_0 = const()[name = string("op_4873_pad_type_0"), val = string("valid")]; - tensor var_4873_strides_0 = const()[name = string("op_4873_strides_0"), val = tensor([1, 1])]; - tensor var_4873_pad_0 = const()[name = string("op_4873_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_4873_dilations_0 = const()[name = string("op_4873_dilations_0"), val = tensor([1, 1])]; - int32 var_4873_groups_0 = const()[name = string("op_4873_groups_0"), val = int32(1)]; - tensor var_4873 = conv(dilations = var_4873_dilations_0, groups = var_4873_groups_0, pad = var_4873_pad_0, pad_type = var_4873_pad_type_0, strides = var_4873_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_4813_cast_fp16)[name = string("op_4873")]; - tensor var_4878 = const()[name = string("op_4878"), val = tensor([1, 1, 1, 256])]; - tensor var_4879 = reshape(shape = var_4878, x = var_4873)[name = string("op_4879")]; - int32 var_4894 = const()[name = string("op_4894"), val = int32(-1)]; - fp16 const_194_promoted = const()[name = string("const_194_promoted"), val = fp16(-0x1p+0)]; - tensor var_4896 = mul(x = var_4835, y = const_194_promoted)[name = string("op_4896")]; - bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; - tensor input_105 = concat(axis = var_4894, interleave = input_105_interleave_0, values = (var_4835, var_4896))[name = string("input_105")]; - tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; - fp16 var_4891_to_fp16 = const()[name = string("op_4891_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_4891_to_fp16, x = input_105)[name = string("normed_125_cast_fp16")]; - tensor normed_127_begin_0 = const()[name = string("normed_127_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_127_end_0 = const()[name = string("normed_127_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_127_end_mask_0 = const()[name = string("normed_127_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_127 = slice_by_index(begin = normed_127_begin_0, end = normed_127_end_0, end_mask = normed_127_end_mask_0, x = normed_125_cast_fp16)[name = string("normed_127")]; - tensor var_4910_to_fp16 = const()[name = string("op_4910_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147282688)))]; - tensor q_11_cast_fp16 = mul(x = normed_127, y = var_4910_to_fp16)[name = string("q_11_cast_fp16")]; - int32 var_4921 = const()[name = string("op_4921"), val = int32(-1)]; - fp16 const_198_promoted = const()[name = string("const_198_promoted"), val = fp16(-0x1p+0)]; - tensor var_4923 = mul(x = var_4857, y = const_198_promoted)[name = string("op_4923")]; - bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; - tensor input_107 = concat(axis = var_4921, interleave = input_107_interleave_0, values = (var_4857, var_4923))[name = string("input_107")]; - tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; - fp16 var_4918_to_fp16 = const()[name = string("op_4918_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_4918_to_fp16, x = input_107)[name = string("normed_129_cast_fp16")]; - tensor normed_131_begin_0 = const()[name = string("normed_131_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_131_end_0 = const()[name = string("normed_131_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_131_end_mask_0 = const()[name = string("normed_131_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_131 = slice_by_index(begin = normed_131_begin_0, end = normed_131_end_0, end_mask = normed_131_end_mask_0, x = normed_129_cast_fp16)[name = string("normed_131")]; - tensor var_4937_to_fp16 = const()[name = string("op_4937_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147283264)))]; - tensor k_11_cast_fp16 = mul(x = normed_131, y = var_4937_to_fp16)[name = string("k_11_cast_fp16")]; - tensor var_4939_cast_fp16 = mul(x = q_11_cast_fp16, y = cos_21_cast_fp16)[name = string("op_4939_cast_fp16")]; - tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11_cast_fp16)[name = string("x1_21_cast_fp16")]; - tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11_cast_fp16)[name = string("x2_21_cast_fp16")]; - fp16 const_204_promoted_to_fp16 = const()[name = string("const_204_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4960_cast_fp16 = mul(x = x2_21_cast_fp16, y = const_204_promoted_to_fp16)[name = string("op_4960_cast_fp16")]; - int32 var_4962 = const()[name = string("op_4962"), val = int32(-1)]; - bool var_4963_interleave_0 = const()[name = string("op_4963_interleave_0"), val = bool(false)]; - tensor var_4963_cast_fp16 = concat(axis = var_4962, interleave = var_4963_interleave_0, values = (var_4960_cast_fp16, x1_21_cast_fp16))[name = string("op_4963_cast_fp16")]; - tensor var_4964_cast_fp16 = mul(x = var_4963_cast_fp16, y = sin_21_cast_fp16)[name = string("op_4964_cast_fp16")]; - tensor query_states_21_cast_fp16 = add(x = var_4939_cast_fp16, y = var_4964_cast_fp16)[name = string("query_states_21_cast_fp16")]; - tensor var_4967_cast_fp16 = mul(x = k_11_cast_fp16, y = cos_21_cast_fp16)[name = string("op_4967_cast_fp16")]; - tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11_cast_fp16)[name = string("x1_23_cast_fp16")]; - tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11_cast_fp16)[name = string("x2_23_cast_fp16")]; - fp16 const_207_promoted_to_fp16 = const()[name = string("const_207_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4988_cast_fp16 = mul(x = x2_23_cast_fp16, y = const_207_promoted_to_fp16)[name = string("op_4988_cast_fp16")]; - int32 var_4990 = const()[name = string("op_4990"), val = int32(-1)]; - bool var_4991_interleave_0 = const()[name = string("op_4991_interleave_0"), val = bool(false)]; - tensor var_4991_cast_fp16 = concat(axis = var_4990, interleave = var_4991_interleave_0, values = (var_4988_cast_fp16, x1_23_cast_fp16))[name = string("op_4991_cast_fp16")]; - tensor var_4992_cast_fp16 = mul(x = var_4991_cast_fp16, y = sin_21_cast_fp16)[name = string("op_4992_cast_fp16")]; - tensor key_states_21_cast_fp16 = add(x = var_4967_cast_fp16, y = var_4992_cast_fp16)[name = string("key_states_21_cast_fp16")]; - int32 var_4996 = const()[name = string("op_4996"), val = int32(1)]; - tensor var_4997 = add(x = current_pos, y = var_4996)[name = string("op_4997")]; - tensor read_state_1 = read_state(input = model_model_kv_cache_global)[name = string("read_state_1")]; - tensor expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor([0])]; - tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; - tensor expand_dims_53 = const()[name = string("expand_dims_53"), val = tensor([0])]; - tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([1])]; - int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)]; - bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)]; - tensor concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_50, expand_dims_51, current_pos, expand_dims_53))[name = string("concat_22")]; - tensor concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor([0])]; - tensor concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor([0])]; - int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; - bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; - tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_54, concat_23_values1_0, var_4997, concat_23_values3_0))[name = string("concat_23")]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_1_stride_0, update = key_states_21_cast_fp16, x = read_state_1)[name = string("model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_62_write_state")]; - tensor coreml_update_state_62 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_62")]; - tensor expand_dims_56 = const()[name = string("expand_dims_56"), val = tensor([4])]; - tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; - tensor expand_dims_59 = const()[name = string("expand_dims_59"), val = tensor([0])]; - tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; - int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; - bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; - tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_56, expand_dims_57, current_pos, expand_dims_59))[name = string("concat_26")]; - tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; - tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; - int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; - bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; - tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_60, concat_27_values1_0, var_4997, concat_27_values3_0))[name = string("concat_27")]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_2_stride_0, update = var_4879, x = coreml_update_state_62)[name = string("model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_63_write_state")]; - tensor coreml_update_state_63 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_63")]; - tensor var_5047_begin_0 = const()[name = string("op_5047_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_5047_end_0 = const()[name = string("op_5047_end_0"), val = tensor([1, 1, 4096, 256])]; - tensor var_5047_end_mask_0 = const()[name = string("op_5047_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_5047_cast_fp16 = slice_by_index(begin = var_5047_begin_0, end = var_5047_end_0, end_mask = var_5047_end_mask_0, x = coreml_update_state_63)[name = string("op_5047_cast_fp16")]; - tensor var_5054_begin_0 = const()[name = string("op_5054_begin_0"), val = tensor([4, 0, 0, 0])]; - tensor var_5054_end_0 = const()[name = string("op_5054_end_0"), val = tensor([5, 1, 4096, 256])]; - tensor var_5054_end_mask_0 = const()[name = string("op_5054_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_5054_cast_fp16 = slice_by_index(begin = var_5054_begin_0, end = var_5054_end_0, end_mask = var_5054_end_mask_0, x = coreml_update_state_63)[name = string("op_5054_cast_fp16")]; - tensor var_5091 = const()[name = string("op_5091"), val = tensor([1, 4, 1, 1])]; - tensor x_85_cast_fp16 = tile(reps = var_5091, x = var_5047_cast_fp16)[name = string("x_85_cast_fp16")]; - tensor var_5111 = const()[name = string("op_5111"), val = tensor([1, 4, 1, 1])]; - tensor x_91_cast_fp16 = tile(reps = var_5111, x = var_5054_cast_fp16)[name = string("x_91_cast_fp16")]; - bool var_5138_transpose_x_1 = const()[name = string("op_5138_transpose_x_1"), val = bool(false)]; - bool var_5138_transpose_y_1 = const()[name = string("op_5138_transpose_y_1"), val = bool(true)]; - tensor var_5138 = matmul(transpose_x = var_5138_transpose_x_1, transpose_y = var_5138_transpose_y_1, x = query_states_21_cast_fp16, y = x_85_cast_fp16)[name = string("op_5138")]; - fp16 var_5139_to_fp16 = const()[name = string("op_5139_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_31_cast_fp16 = mul(x = var_5138, y = var_5139_to_fp16)[name = string("attn_weights_31_cast_fp16")]; - tensor attn_weights_33_cast_fp16 = add(x = attn_weights_31_cast_fp16, y = causal_mask)[name = string("attn_weights_33_cast_fp16")]; - int32 var_5174 = const()[name = string("op_5174"), val = int32(-1)]; - tensor attn_weights_35_cast_fp16 = softmax(axis = var_5174, x = attn_weights_33_cast_fp16)[name = string("attn_weights_35_cast_fp16")]; - bool attn_output_51_transpose_x_0 = const()[name = string("attn_output_51_transpose_x_0"), val = bool(false)]; - bool attn_output_51_transpose_y_0 = const()[name = string("attn_output_51_transpose_y_0"), val = bool(false)]; - tensor attn_output_51_cast_fp16 = matmul(transpose_x = attn_output_51_transpose_x_0, transpose_y = attn_output_51_transpose_y_0, x = attn_weights_35_cast_fp16, y = x_91_cast_fp16)[name = string("attn_output_51_cast_fp16")]; - tensor var_5185_perm_0 = const()[name = string("op_5185_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_5189 = const()[name = string("op_5189"), val = tensor([1, 1, 1024])]; - tensor var_5185_cast_fp16 = transpose(perm = var_5185_perm_0, x = attn_output_51_cast_fp16)[name = string("transpose_124")]; - tensor attn_output_55_cast_fp16 = reshape(shape = var_5189, x = var_5185_cast_fp16)[name = string("attn_output_55_cast_fp16")]; - tensor var_5194 = const()[name = string("op_5194"), val = tensor([0, 2, 1])]; - string var_5210_pad_type_0 = const()[name = string("op_5210_pad_type_0"), val = string("valid")]; - int32 var_5210_groups_0 = const()[name = string("op_5210_groups_0"), val = int32(1)]; - tensor var_5210_strides_0 = const()[name = string("op_5210_strides_0"), val = tensor([1])]; - tensor var_5210_pad_0 = const()[name = string("op_5210_pad_0"), val = tensor([0, 0])]; - tensor var_5210_dilations_0 = const()[name = string("op_5210_dilations_0"), val = tensor([1])]; - tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147283840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148168640))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_5195_cast_fp16 = transpose(perm = var_5194, x = attn_output_55_cast_fp16)[name = string("transpose_123")]; - tensor var_5210_cast_fp16 = conv(dilations = var_5210_dilations_0, groups = var_5210_groups_0, pad = var_5210_pad_0, pad_type = var_5210_pad_type_0, strides = var_5210_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_5195_cast_fp16)[name = string("op_5210_cast_fp16")]; - tensor var_5214 = const()[name = string("op_5214"), val = tensor([0, 2, 1])]; - int32 var_5225 = const()[name = string("op_5225"), val = int32(-1)]; - fp16 const_216_promoted_to_fp16 = const()[name = string("const_216_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_89_cast_fp16 = transpose(perm = var_5214, x = var_5210_cast_fp16)[name = string("transpose_122")]; - tensor var_5227_cast_fp16 = mul(x = hidden_states_89_cast_fp16, y = const_216_promoted_to_fp16)[name = string("op_5227_cast_fp16")]; - bool input_111_interleave_0 = const()[name = string("input_111_interleave_0"), val = bool(false)]; - tensor input_111_cast_fp16 = concat(axis = var_5225, interleave = input_111_interleave_0, values = (hidden_states_89_cast_fp16, var_5227_cast_fp16))[name = string("input_111_cast_fp16")]; - tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; - fp16 var_5222_to_fp16 = const()[name = string("op_5222_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_5222_to_fp16, x = input_111_cast_fp16)[name = string("normed_133_cast_fp16")]; - tensor normed_135_begin_0 = const()[name = string("normed_135_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_135_end_0 = const()[name = string("normed_135_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_135_end_mask_0 = const()[name = string("normed_135_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_135_cast_fp16 = slice_by_index(begin = normed_135_begin_0, end = normed_135_end_0, end_mask = normed_135_end_mask_0, x = normed_133_cast_fp16)[name = string("normed_135_cast_fp16")]; - tensor var_5241_to_fp16 = const()[name = string("op_5241_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148187136)))]; - tensor attn_output_59_cast_fp16 = mul(x = normed_135_cast_fp16, y = var_5241_to_fp16)[name = string("attn_output_59_cast_fp16")]; - tensor hidden_states_91_cast_fp16 = add(x = hidden_states_81_cast_fp16, y = attn_output_59_cast_fp16)[name = string("hidden_states_91_cast_fp16")]; - int32 var_5254 = const()[name = string("op_5254"), val = int32(-1)]; - fp16 const_220_promoted_to_fp16 = const()[name = string("const_220_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5256_cast_fp16 = mul(x = hidden_states_91_cast_fp16, y = const_220_promoted_to_fp16)[name = string("op_5256_cast_fp16")]; - bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; - tensor input_113_cast_fp16 = concat(axis = var_5254, interleave = input_113_interleave_0, values = (hidden_states_91_cast_fp16, var_5256_cast_fp16))[name = string("input_113_cast_fp16")]; - tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; - fp16 var_5251_to_fp16 = const()[name = string("op_5251_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_5251_to_fp16, x = input_113_cast_fp16)[name = string("normed_137_cast_fp16")]; - tensor normed_139_begin_0 = const()[name = string("normed_139_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_139_end_0 = const()[name = string("normed_139_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_139_end_mask_0 = const()[name = string("normed_139_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_139_cast_fp16 = slice_by_index(begin = normed_139_begin_0, end = normed_139_end_0, end_mask = normed_139_end_mask_0, x = normed_137_cast_fp16)[name = string("normed_139_cast_fp16")]; - tensor var_5270_to_fp16 = const()[name = string("op_5270_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148189504)))]; - tensor x_93_cast_fp16 = mul(x = normed_139_cast_fp16, y = var_5270_to_fp16)[name = string("x_93_cast_fp16")]; - tensor var_5282 = const()[name = string("op_5282"), val = tensor([0, 2, 1])]; - tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; - tensor var_5283_cast_fp16 = transpose(perm = var_5282, x = x_93_cast_fp16)[name = string("transpose_121")]; - tensor input_115_cast_fp16 = expand_dims(axes = input_115_axes_0, x = var_5283_cast_fp16)[name = string("input_115_cast_fp16")]; - string x_95_pad_type_0 = const()[name = string("x_95_pad_type_0"), val = string("valid")]; - tensor x_95_strides_0 = const()[name = string("x_95_strides_0"), val = tensor([1, 1])]; - tensor x_95_pad_0 = const()[name = string("x_95_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_95_dilations_0 = const()[name = string("x_95_dilations_0"), val = tensor([1, 1])]; - int32 x_95_groups_0 = const()[name = string("x_95_groups_0"), val = int32(1)]; - tensor model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(621795200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(627767232))))[name = string("model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_95_cast_fp16 = conv(dilations = x_95_dilations_0, groups = x_95_groups_0, pad = x_95_pad_0, pad_type = x_95_pad_type_0, strides = x_95_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_115_cast_fp16)[name = string("x_95_cast_fp16")]; - string b_11_pad_type_0 = const()[name = string("b_11_pad_type_0"), val = string("valid")]; - tensor b_11_strides_0 = const()[name = string("b_11_strides_0"), val = tensor([1, 1])]; - tensor b_11_pad_0 = const()[name = string("b_11_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_11_dilations_0 = const()[name = string("b_11_dilations_0"), val = tensor([1, 1])]; - int32 b_11_groups_0 = const()[name = string("b_11_groups_0"), val = int32(1)]; - tensor model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(627877888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(633849920))))[name = string("model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_11_cast_fp16 = conv(dilations = b_11_dilations_0, groups = b_11_groups_0, pad = b_11_pad_0, pad_type = b_11_pad_type_0, strides = b_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_115_cast_fp16)[name = string("b_11_cast_fp16")]; - string var_5308_mode_0 = const()[name = string("op_5308_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_5308_cast_fp16 = gelu(mode = var_5308_mode_0, x = x_95_cast_fp16)[name = string("op_5308_cast_fp16")]; - tensor input_117_cast_fp16 = mul(x = var_5308_cast_fp16, y = b_11_cast_fp16)[name = string("input_117_cast_fp16")]; - string e_11_pad_type_0 = const()[name = string("e_11_pad_type_0"), val = string("valid")]; - tensor e_11_strides_0 = const()[name = string("e_11_strides_0"), val = tensor([1, 1])]; - tensor e_11_pad_0 = const()[name = string("e_11_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_11_dilations_0 = const()[name = string("e_11_dilations_0"), val = tensor([1, 1])]; - int32 e_11_groups_0 = const()[name = string("e_11_groups_0"), val = int32(1)]; - tensor model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160357248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166329280))))[name = string("model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_11_cast_fp16 = conv(dilations = e_11_dilations_0, groups = e_11_groups_0, pad = e_11_pad_0, pad_type = e_11_pad_type_0, strides = e_11_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("e_11_cast_fp16")]; - tensor var_5316_axes_0 = const()[name = string("op_5316_axes_0"), val = tensor([2])]; - tensor var_5316_cast_fp16 = squeeze(axes = var_5316_axes_0, x = e_11_cast_fp16)[name = string("op_5316_cast_fp16")]; - tensor var_5317 = const()[name = string("op_5317"), val = tensor([0, 2, 1])]; - int32 var_5328 = const()[name = string("op_5328"), val = int32(-1)]; - fp16 const_224_promoted_to_fp16 = const()[name = string("const_224_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_93_cast_fp16 = transpose(perm = var_5317, x = var_5316_cast_fp16)[name = string("transpose_120")]; - tensor var_5330_cast_fp16 = mul(x = hidden_states_93_cast_fp16, y = const_224_promoted_to_fp16)[name = string("op_5330_cast_fp16")]; - bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; - tensor input_119_cast_fp16 = concat(axis = var_5328, interleave = input_119_interleave_0, values = (hidden_states_93_cast_fp16, var_5330_cast_fp16))[name = string("input_119_cast_fp16")]; - tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; - fp16 var_5325_to_fp16 = const()[name = string("op_5325_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_5325_to_fp16, x = input_119_cast_fp16)[name = string("normed_141_cast_fp16")]; - tensor normed_143_begin_0 = const()[name = string("normed_143_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_143_end_0 = const()[name = string("normed_143_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_143_end_mask_0 = const()[name = string("normed_143_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_143_cast_fp16 = slice_by_index(begin = normed_143_begin_0, end = normed_143_end_0, end_mask = normed_143_end_mask_0, x = normed_141_cast_fp16)[name = string("normed_143_cast_fp16")]; - tensor var_5344_to_fp16 = const()[name = string("op_5344_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166347776)))]; - tensor hidden_states_95_cast_fp16 = mul(x = normed_143_cast_fp16, y = var_5344_to_fp16)[name = string("hidden_states_95_cast_fp16")]; - tensor hidden_states_97_cast_fp16 = add(x = hidden_states_91_cast_fp16, y = hidden_states_95_cast_fp16)[name = string("hidden_states_97_cast_fp16")]; - int32 var_5395 = const()[name = string("op_5395"), val = int32(-1)]; - fp16 const_228_promoted_to_fp16 = const()[name = string("const_228_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5397_cast_fp16 = mul(x = hidden_states_97_cast_fp16, y = const_228_promoted_to_fp16)[name = string("op_5397_cast_fp16")]; - bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; - tensor input_121_cast_fp16 = concat(axis = var_5395, interleave = input_121_interleave_0, values = (hidden_states_97_cast_fp16, var_5397_cast_fp16))[name = string("input_121_cast_fp16")]; - tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; - fp16 var_5392_to_fp16 = const()[name = string("op_5392_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_5392_to_fp16, x = input_121_cast_fp16)[name = string("normed_145_cast_fp16")]; - tensor normed_147_begin_0 = const()[name = string("normed_147_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_147_end_0 = const()[name = string("normed_147_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_147_end_mask_0 = const()[name = string("normed_147_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_147_cast_fp16 = slice_by_index(begin = normed_147_begin_0, end = normed_147_end_0, end_mask = normed_147_end_mask_0, x = normed_145_cast_fp16)[name = string("normed_147_cast_fp16")]; - tensor var_5411_to_fp16 = const()[name = string("op_5411_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166350144)))]; - tensor hidden_states_99_cast_fp16 = mul(x = normed_147_cast_fp16, y = var_5411_to_fp16)[name = string("hidden_states_99_cast_fp16")]; - tensor var_5416 = const()[name = string("op_5416"), val = tensor([0, 2, 1])]; - tensor var_5419_axes_0 = const()[name = string("op_5419_axes_0"), val = tensor([2])]; - tensor var_5417_cast_fp16 = transpose(perm = var_5416, x = hidden_states_99_cast_fp16)[name = string("transpose_119")]; - tensor var_5419_cast_fp16 = expand_dims(axes = var_5419_axes_0, x = var_5417_cast_fp16)[name = string("op_5419_cast_fp16")]; - string var_5435_pad_type_0 = const()[name = string("op_5435_pad_type_0"), val = string("valid")]; - tensor var_5435_strides_0 = const()[name = string("op_5435_strides_0"), val = tensor([1, 1])]; - tensor var_5435_pad_0 = const()[name = string("op_5435_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_5435_dilations_0 = const()[name = string("op_5435_dilations_0"), val = tensor([1, 1])]; - int32 var_5435_groups_0 = const()[name = string("op_5435_groups_0"), val = int32(1)]; - tensor var_5435 = conv(dilations = var_5435_dilations_0, groups = var_5435_groups_0, pad = var_5435_pad_0, pad_type = var_5435_pad_type_0, strides = var_5435_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_5419_cast_fp16)[name = string("op_5435")]; - tensor var_5440 = const()[name = string("op_5440"), val = tensor([1, 4, 1, 256])]; - tensor var_5441 = reshape(shape = var_5440, x = var_5435)[name = string("op_5441")]; - string var_5457_pad_type_0 = const()[name = string("op_5457_pad_type_0"), val = string("valid")]; - tensor var_5457_strides_0 = const()[name = string("op_5457_strides_0"), val = tensor([1, 1])]; - tensor var_5457_pad_0 = const()[name = string("op_5457_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_5457_dilations_0 = const()[name = string("op_5457_dilations_0"), val = tensor([1, 1])]; - int32 var_5457_groups_0 = const()[name = string("op_5457_groups_0"), val = int32(1)]; - tensor var_5457 = conv(dilations = var_5457_dilations_0, groups = var_5457_groups_0, pad = var_5457_pad_0, pad_type = var_5457_pad_type_0, strides = var_5457_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_5419_cast_fp16)[name = string("op_5457")]; - tensor var_5462 = const()[name = string("op_5462"), val = tensor([1, 1, 1, 256])]; - tensor var_5463 = reshape(shape = var_5462, x = var_5457)[name = string("op_5463")]; - string var_5479_pad_type_0 = const()[name = string("op_5479_pad_type_0"), val = string("valid")]; - tensor var_5479_strides_0 = const()[name = string("op_5479_strides_0"), val = tensor([1, 1])]; - tensor var_5479_pad_0 = const()[name = string("op_5479_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_5479_dilations_0 = const()[name = string("op_5479_dilations_0"), val = tensor([1, 1])]; - int32 var_5479_groups_0 = const()[name = string("op_5479_groups_0"), val = int32(1)]; - tensor var_5479 = conv(dilations = var_5479_dilations_0, groups = var_5479_groups_0, pad = var_5479_pad_0, pad_type = var_5479_pad_type_0, strides = var_5479_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_5419_cast_fp16)[name = string("op_5479")]; - tensor var_5484 = const()[name = string("op_5484"), val = tensor([1, 1, 1, 256])]; - tensor var_5485 = reshape(shape = var_5484, x = var_5479)[name = string("op_5485")]; - int32 var_5500 = const()[name = string("op_5500"), val = int32(-1)]; - fp16 const_232_promoted = const()[name = string("const_232_promoted"), val = fp16(-0x1p+0)]; - tensor var_5502 = mul(x = var_5441, y = const_232_promoted)[name = string("op_5502")]; - bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; - tensor input_125 = concat(axis = var_5500, interleave = input_125_interleave_0, values = (var_5441, var_5502))[name = string("input_125")]; - tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; - fp16 var_5497_to_fp16 = const()[name = string("op_5497_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_5497_to_fp16, x = input_125)[name = string("normed_149_cast_fp16")]; - tensor normed_151_begin_0 = const()[name = string("normed_151_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_151_end_0 = const()[name = string("normed_151_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_151_end_mask_0 = const()[name = string("normed_151_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_151 = slice_by_index(begin = normed_151_begin_0, end = normed_151_end_0, end_mask = normed_151_end_mask_0, x = normed_149_cast_fp16)[name = string("normed_151")]; - tensor var_5516_to_fp16 = const()[name = string("op_5516_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166352512)))]; - tensor q_13_cast_fp16 = mul(x = normed_151, y = var_5516_to_fp16)[name = string("q_13_cast_fp16")]; - int32 var_5527 = const()[name = string("op_5527"), val = int32(-1)]; - fp16 const_236_promoted = const()[name = string("const_236_promoted"), val = fp16(-0x1p+0)]; - tensor var_5529 = mul(x = var_5463, y = const_236_promoted)[name = string("op_5529")]; - bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)]; - tensor input_127 = concat(axis = var_5527, interleave = input_127_interleave_0, values = (var_5463, var_5529))[name = string("input_127")]; - tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; - fp16 var_5524_to_fp16 = const()[name = string("op_5524_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_5524_to_fp16, x = input_127)[name = string("normed_153_cast_fp16")]; - tensor normed_155_begin_0 = const()[name = string("normed_155_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_155_end_0 = const()[name = string("normed_155_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_155_end_mask_0 = const()[name = string("normed_155_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_155 = slice_by_index(begin = normed_155_begin_0, end = normed_155_end_0, end_mask = normed_155_end_mask_0, x = normed_153_cast_fp16)[name = string("normed_155")]; - tensor var_5543_to_fp16 = const()[name = string("op_5543_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166353088)))]; - tensor k_13_cast_fp16 = mul(x = normed_155, y = var_5543_to_fp16)[name = string("k_13_cast_fp16")]; - tensor var_5545_cast_fp16 = mul(x = q_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5545_cast_fp16")]; - tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13_cast_fp16)[name = string("x1_25_cast_fp16")]; - tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13_cast_fp16)[name = string("x2_25_cast_fp16")]; - fp16 const_242_promoted_to_fp16 = const()[name = string("const_242_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5566_cast_fp16 = mul(x = x2_25_cast_fp16, y = const_242_promoted_to_fp16)[name = string("op_5566_cast_fp16")]; - int32 var_5568 = const()[name = string("op_5568"), val = int32(-1)]; - bool var_5569_interleave_0 = const()[name = string("op_5569_interleave_0"), val = bool(false)]; - tensor var_5569_cast_fp16 = concat(axis = var_5568, interleave = var_5569_interleave_0, values = (var_5566_cast_fp16, x1_25_cast_fp16))[name = string("op_5569_cast_fp16")]; - tensor var_5570_cast_fp16 = mul(x = var_5569_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5570_cast_fp16")]; - tensor query_states_25_cast_fp16 = add(x = var_5545_cast_fp16, y = var_5570_cast_fp16)[name = string("query_states_25_cast_fp16")]; - tensor var_5573_cast_fp16 = mul(x = k_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5573_cast_fp16")]; - tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13_cast_fp16)[name = string("x1_27_cast_fp16")]; - tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13_cast_fp16)[name = string("x2_27_cast_fp16")]; - fp16 const_245_promoted_to_fp16 = const()[name = string("const_245_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5594_cast_fp16 = mul(x = x2_27_cast_fp16, y = const_245_promoted_to_fp16)[name = string("op_5594_cast_fp16")]; - int32 var_5596 = const()[name = string("op_5596"), val = int32(-1)]; - bool var_5597_interleave_0 = const()[name = string("op_5597_interleave_0"), val = bool(false)]; - tensor var_5597_cast_fp16 = concat(axis = var_5596, interleave = var_5597_interleave_0, values = (var_5594_cast_fp16, x1_27_cast_fp16))[name = string("op_5597_cast_fp16")]; - tensor var_5598_cast_fp16 = mul(x = var_5597_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5598_cast_fp16")]; - tensor key_states_25_cast_fp16 = add(x = var_5573_cast_fp16, y = var_5598_cast_fp16)[name = string("key_states_25_cast_fp16")]; - tensor key_slice_11_begin_0 = const()[name = string("key_slice_11_begin_0"), val = tensor([5, 0, 0, 0])]; - tensor key_slice_11_end_0 = const()[name = string("key_slice_11_end_0"), val = tensor([6, 1, 512, 256])]; - tensor key_slice_11_end_mask_0 = const()[name = string("key_slice_11_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_11_cast_fp16 = slice_by_index(begin = key_slice_11_begin_0, end = key_slice_11_end_0, end_mask = key_slice_11_end_mask_0, x = coreml_update_state_61)[name = string("key_slice_11_cast_fp16")]; - tensor key_tail_11_begin_0 = const()[name = string("key_tail_11_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_11_end_0 = const()[name = string("key_tail_11_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_11_cast_fp16 = slice_by_index(begin = key_tail_11_begin_0, end = key_tail_11_end_0, x = key_slice_11_cast_fp16)[name = string("key_tail_11_cast_fp16")]; - int32 var_5611 = const()[name = string("op_5611"), val = int32(2)]; - bool shifted_key_11_interleave_0 = const()[name = string("shifted_key_11_interleave_0"), val = bool(false)]; - tensor shifted_key_11_cast_fp16 = concat(axis = var_5611, interleave = shifted_key_11_interleave_0, values = (key_tail_11_cast_fp16, key_states_25_cast_fp16))[name = string("shifted_key_11_cast_fp16")]; - tensor concat_28 = const()[name = string("concat_28"), val = tensor([5, 0, 0, 0])]; - tensor concat_29 = const()[name = string("concat_29"), val = tensor([6, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_28, begin_mask = model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0, end = concat_29, end_mask = model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_11_stride_0, update = shifted_key_11_cast_fp16, x = coreml_update_state_61)[name = string("model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_64_write_state")]; - tensor coreml_update_state_64 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_64")]; - tensor value_slice_11_begin_0 = const()[name = string("value_slice_11_begin_0"), val = tensor([27, 0, 0, 0])]; - tensor value_slice_11_end_0 = const()[name = string("value_slice_11_end_0"), val = tensor([28, 1, 512, 256])]; - tensor value_slice_11_end_mask_0 = const()[name = string("value_slice_11_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_11_cast_fp16 = slice_by_index(begin = value_slice_11_begin_0, end = value_slice_11_end_0, end_mask = value_slice_11_end_mask_0, x = coreml_update_state_64)[name = string("value_slice_11_cast_fp16")]; - tensor value_tail_11_begin_0 = const()[name = string("value_tail_11_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_11_end_0 = const()[name = string("value_tail_11_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_11_cast_fp16 = slice_by_index(begin = value_tail_11_begin_0, end = value_tail_11_end_0, x = value_slice_11_cast_fp16)[name = string("value_tail_11_cast_fp16")]; - int32 var_5645 = const()[name = string("op_5645"), val = int32(2)]; - bool shifted_value_11_interleave_0 = const()[name = string("shifted_value_11_interleave_0"), val = bool(false)]; - tensor shifted_value_11_cast_fp16 = concat(axis = var_5645, interleave = shifted_value_11_interleave_0, values = (value_tail_11_cast_fp16, var_5485))[name = string("shifted_value_11_cast_fp16")]; - tensor concat_30 = const()[name = string("concat_30"), val = tensor([27, 0, 0, 0])]; - tensor concat_31 = const()[name = string("concat_31"), val = tensor([28, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_12_stride_0, update = shifted_value_11_cast_fp16, x = coreml_update_state_64)[name = string("model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_65_write_state")]; - tensor coreml_update_state_65 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_65")]; - tensor var_5673_begin_0 = const()[name = string("op_5673_begin_0"), val = tensor([5, 0, 0, 0])]; - tensor var_5673_end_0 = const()[name = string("op_5673_end_0"), val = tensor([6, 1, 512, 256])]; - tensor var_5673_end_mask_0 = const()[name = string("op_5673_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_5673_cast_fp16 = slice_by_index(begin = var_5673_begin_0, end = var_5673_end_0, end_mask = var_5673_end_mask_0, x = coreml_update_state_65)[name = string("op_5673_cast_fp16")]; - tensor var_5680_begin_0 = const()[name = string("op_5680_begin_0"), val = tensor([27, 0, 0, 0])]; - tensor var_5680_end_0 = const()[name = string("op_5680_end_0"), val = tensor([28, 1, 512, 256])]; - tensor var_5680_end_mask_0 = const()[name = string("op_5680_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_5680_cast_fp16 = slice_by_index(begin = var_5680_begin_0, end = var_5680_end_0, end_mask = var_5680_end_mask_0, x = coreml_update_state_65)[name = string("op_5680_cast_fp16")]; - tensor var_5717 = const()[name = string("op_5717"), val = tensor([1, 4, 1, 1])]; - tensor x_101_cast_fp16 = tile(reps = var_5717, x = var_5673_cast_fp16)[name = string("x_101_cast_fp16")]; - tensor var_5737 = const()[name = string("op_5737"), val = tensor([1, 4, 1, 1])]; - tensor x_107_cast_fp16 = tile(reps = var_5737, x = var_5680_cast_fp16)[name = string("x_107_cast_fp16")]; - bool var_5764_transpose_x_1 = const()[name = string("op_5764_transpose_x_1"), val = bool(false)]; - bool var_5764_transpose_y_1 = const()[name = string("op_5764_transpose_y_1"), val = bool(true)]; - tensor var_5764 = matmul(transpose_x = var_5764_transpose_x_1, transpose_y = var_5764_transpose_y_1, x = query_states_25_cast_fp16, y = x_101_cast_fp16)[name = string("op_5764")]; - fp16 var_5765_to_fp16 = const()[name = string("op_5765_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_37_cast_fp16 = mul(x = var_5764, y = var_5765_to_fp16)[name = string("attn_weights_37_cast_fp16")]; - tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = var_2059)[name = string("attn_weights_39_cast_fp16")]; - int32 var_5800 = const()[name = string("op_5800"), val = int32(-1)]; - tensor attn_weights_41_cast_fp16 = softmax(axis = var_5800, x = attn_weights_39_cast_fp16)[name = string("attn_weights_41_cast_fp16")]; - bool attn_output_61_transpose_x_0 = const()[name = string("attn_output_61_transpose_x_0"), val = bool(false)]; - bool attn_output_61_transpose_y_0 = const()[name = string("attn_output_61_transpose_y_0"), val = bool(false)]; - tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_0, transpose_y = attn_output_61_transpose_y_0, x = attn_weights_41_cast_fp16, y = x_107_cast_fp16)[name = string("attn_output_61_cast_fp16")]; - tensor var_5811_perm_0 = const()[name = string("op_5811_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_5815 = const()[name = string("op_5815"), val = tensor([1, 1, 1024])]; - tensor var_5811_cast_fp16 = transpose(perm = var_5811_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_118")]; - tensor attn_output_65_cast_fp16 = reshape(shape = var_5815, x = var_5811_cast_fp16)[name = string("attn_output_65_cast_fp16")]; - tensor var_5820 = const()[name = string("op_5820"), val = tensor([0, 2, 1])]; - string var_5836_pad_type_0 = const()[name = string("op_5836_pad_type_0"), val = string("valid")]; - int32 var_5836_groups_0 = const()[name = string("op_5836_groups_0"), val = int32(1)]; - tensor var_5836_strides_0 = const()[name = string("op_5836_strides_0"), val = tensor([1])]; - tensor var_5836_pad_0 = const()[name = string("op_5836_pad_0"), val = tensor([0, 0])]; - tensor var_5836_dilations_0 = const()[name = string("op_5836_dilations_0"), val = tensor([1])]; - tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166353664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167238464))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_5821_cast_fp16 = transpose(perm = var_5820, x = attn_output_65_cast_fp16)[name = string("transpose_117")]; - tensor var_5836_cast_fp16 = conv(dilations = var_5836_dilations_0, groups = var_5836_groups_0, pad = var_5836_pad_0, pad_type = var_5836_pad_type_0, strides = var_5836_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_5821_cast_fp16)[name = string("op_5836_cast_fp16")]; - tensor var_5840 = const()[name = string("op_5840"), val = tensor([0, 2, 1])]; - int32 var_5851 = const()[name = string("op_5851"), val = int32(-1)]; - fp16 const_254_promoted_to_fp16 = const()[name = string("const_254_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_105_cast_fp16 = transpose(perm = var_5840, x = var_5836_cast_fp16)[name = string("transpose_116")]; - tensor var_5853_cast_fp16 = mul(x = hidden_states_105_cast_fp16, y = const_254_promoted_to_fp16)[name = string("op_5853_cast_fp16")]; - bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; - tensor input_131_cast_fp16 = concat(axis = var_5851, interleave = input_131_interleave_0, values = (hidden_states_105_cast_fp16, var_5853_cast_fp16))[name = string("input_131_cast_fp16")]; - tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; - fp16 var_5848_to_fp16 = const()[name = string("op_5848_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_5848_to_fp16, x = input_131_cast_fp16)[name = string("normed_157_cast_fp16")]; - tensor normed_159_begin_0 = const()[name = string("normed_159_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_159_end_0 = const()[name = string("normed_159_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_159_end_mask_0 = const()[name = string("normed_159_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_159_cast_fp16 = slice_by_index(begin = normed_159_begin_0, end = normed_159_end_0, end_mask = normed_159_end_mask_0, x = normed_157_cast_fp16)[name = string("normed_159_cast_fp16")]; - tensor var_5867_to_fp16 = const()[name = string("op_5867_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167256960)))]; - tensor attn_output_69_cast_fp16 = mul(x = normed_159_cast_fp16, y = var_5867_to_fp16)[name = string("attn_output_69_cast_fp16")]; - tensor hidden_states_107_cast_fp16 = add(x = hidden_states_97_cast_fp16, y = attn_output_69_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; - int32 var_5880 = const()[name = string("op_5880"), val = int32(-1)]; - fp16 const_258_promoted_to_fp16 = const()[name = string("const_258_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5882_cast_fp16 = mul(x = hidden_states_107_cast_fp16, y = const_258_promoted_to_fp16)[name = string("op_5882_cast_fp16")]; - bool input_133_interleave_0 = const()[name = string("input_133_interleave_0"), val = bool(false)]; - tensor input_133_cast_fp16 = concat(axis = var_5880, interleave = input_133_interleave_0, values = (hidden_states_107_cast_fp16, var_5882_cast_fp16))[name = string("input_133_cast_fp16")]; - tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; - fp16 var_5877_to_fp16 = const()[name = string("op_5877_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_5877_to_fp16, x = input_133_cast_fp16)[name = string("normed_161_cast_fp16")]; - tensor normed_163_begin_0 = const()[name = string("normed_163_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_163_end_0 = const()[name = string("normed_163_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_163_end_mask_0 = const()[name = string("normed_163_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_163_cast_fp16 = slice_by_index(begin = normed_163_begin_0, end = normed_163_end_0, end_mask = normed_163_end_mask_0, x = normed_161_cast_fp16)[name = string("normed_163_cast_fp16")]; - tensor var_5896_to_fp16 = const()[name = string("op_5896_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167259328)))]; - tensor x_109_cast_fp16 = mul(x = normed_163_cast_fp16, y = var_5896_to_fp16)[name = string("x_109_cast_fp16")]; - tensor var_5908 = const()[name = string("op_5908"), val = tensor([0, 2, 1])]; - tensor input_135_axes_0 = const()[name = string("input_135_axes_0"), val = tensor([2])]; - tensor var_5909_cast_fp16 = transpose(perm = var_5908, x = x_109_cast_fp16)[name = string("transpose_115")]; - tensor input_135_cast_fp16 = expand_dims(axes = input_135_axes_0, x = var_5909_cast_fp16)[name = string("input_135_cast_fp16")]; - string x_111_pad_type_0 = const()[name = string("x_111_pad_type_0"), val = string("valid")]; - tensor x_111_strides_0 = const()[name = string("x_111_strides_0"), val = tensor([1, 1])]; - tensor x_111_pad_0 = const()[name = string("x_111_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_111_dilations_0 = const()[name = string("x_111_dilations_0"), val = tensor([1, 1])]; - int32 x_111_groups_0 = const()[name = string("x_111_groups_0"), val = int32(1)]; - tensor model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(633960576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639932608))))[name = string("model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_111_cast_fp16 = conv(dilations = x_111_dilations_0, groups = x_111_groups_0, pad = x_111_pad_0, pad_type = x_111_pad_type_0, strides = x_111_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_135_cast_fp16)[name = string("x_111_cast_fp16")]; - string b_13_pad_type_0 = const()[name = string("b_13_pad_type_0"), val = string("valid")]; - tensor b_13_strides_0 = const()[name = string("b_13_strides_0"), val = tensor([1, 1])]; - tensor b_13_pad_0 = const()[name = string("b_13_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_13_dilations_0 = const()[name = string("b_13_dilations_0"), val = tensor([1, 1])]; - int32 b_13_groups_0 = const()[name = string("b_13_groups_0"), val = int32(1)]; - tensor model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640043264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646015296))))[name = string("model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_13_cast_fp16 = conv(dilations = b_13_dilations_0, groups = b_13_groups_0, pad = b_13_pad_0, pad_type = b_13_pad_type_0, strides = b_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_135_cast_fp16)[name = string("b_13_cast_fp16")]; - string var_5934_mode_0 = const()[name = string("op_5934_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_5934_cast_fp16 = gelu(mode = var_5934_mode_0, x = x_111_cast_fp16)[name = string("op_5934_cast_fp16")]; - tensor input_137_cast_fp16 = mul(x = var_5934_cast_fp16, y = b_13_cast_fp16)[name = string("input_137_cast_fp16")]; - string e_13_pad_type_0 = const()[name = string("e_13_pad_type_0"), val = string("valid")]; - tensor e_13_strides_0 = const()[name = string("e_13_strides_0"), val = tensor([1, 1])]; - tensor e_13_pad_0 = const()[name = string("e_13_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_13_dilations_0 = const()[name = string("e_13_dilations_0"), val = tensor([1, 1])]; - int32 e_13_groups_0 = const()[name = string("e_13_groups_0"), val = int32(1)]; - tensor model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179427072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185399104))))[name = string("model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_13_cast_fp16 = conv(dilations = e_13_dilations_0, groups = e_13_groups_0, pad = e_13_pad_0, pad_type = e_13_pad_type_0, strides = e_13_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_137_cast_fp16)[name = string("e_13_cast_fp16")]; - tensor var_5942_axes_0 = const()[name = string("op_5942_axes_0"), val = tensor([2])]; - tensor var_5942_cast_fp16 = squeeze(axes = var_5942_axes_0, x = e_13_cast_fp16)[name = string("op_5942_cast_fp16")]; - tensor var_5943 = const()[name = string("op_5943"), val = tensor([0, 2, 1])]; - int32 var_5954 = const()[name = string("op_5954"), val = int32(-1)]; - fp16 const_262_promoted_to_fp16 = const()[name = string("const_262_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_109_cast_fp16 = transpose(perm = var_5943, x = var_5942_cast_fp16)[name = string("transpose_114")]; - tensor var_5956_cast_fp16 = mul(x = hidden_states_109_cast_fp16, y = const_262_promoted_to_fp16)[name = string("op_5956_cast_fp16")]; - bool input_139_interleave_0 = const()[name = string("input_139_interleave_0"), val = bool(false)]; - tensor input_139_cast_fp16 = concat(axis = var_5954, interleave = input_139_interleave_0, values = (hidden_states_109_cast_fp16, var_5956_cast_fp16))[name = string("input_139_cast_fp16")]; - tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; - fp16 var_5951_to_fp16 = const()[name = string("op_5951_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_5951_to_fp16, x = input_139_cast_fp16)[name = string("normed_165_cast_fp16")]; - tensor normed_167_begin_0 = const()[name = string("normed_167_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_167_end_0 = const()[name = string("normed_167_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_167_end_mask_0 = const()[name = string("normed_167_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_167_cast_fp16 = slice_by_index(begin = normed_167_begin_0, end = normed_167_end_0, end_mask = normed_167_end_mask_0, x = normed_165_cast_fp16)[name = string("normed_167_cast_fp16")]; - tensor var_5970_to_fp16 = const()[name = string("op_5970_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185417600)))]; - tensor hidden_states_111_cast_fp16 = mul(x = normed_167_cast_fp16, y = var_5970_to_fp16)[name = string("hidden_states_111_cast_fp16")]; - tensor hidden_states_113_cast_fp16 = add(x = hidden_states_107_cast_fp16, y = hidden_states_111_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; - int32 var_6021 = const()[name = string("op_6021"), val = int32(-1)]; - fp16 const_266_promoted_to_fp16 = const()[name = string("const_266_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6023_cast_fp16 = mul(x = hidden_states_113_cast_fp16, y = const_266_promoted_to_fp16)[name = string("op_6023_cast_fp16")]; - bool input_141_interleave_0 = const()[name = string("input_141_interleave_0"), val = bool(false)]; - tensor input_141_cast_fp16 = concat(axis = var_6021, interleave = input_141_interleave_0, values = (hidden_states_113_cast_fp16, var_6023_cast_fp16))[name = string("input_141_cast_fp16")]; - tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; - fp16 var_6018_to_fp16 = const()[name = string("op_6018_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_6018_to_fp16, x = input_141_cast_fp16)[name = string("normed_169_cast_fp16")]; - tensor normed_171_begin_0 = const()[name = string("normed_171_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_171_end_0 = const()[name = string("normed_171_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_171_end_mask_0 = const()[name = string("normed_171_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_171_cast_fp16 = slice_by_index(begin = normed_171_begin_0, end = normed_171_end_0, end_mask = normed_171_end_mask_0, x = normed_169_cast_fp16)[name = string("normed_171_cast_fp16")]; - tensor var_6037_to_fp16 = const()[name = string("op_6037_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185419968)))]; - tensor hidden_states_115_cast_fp16 = mul(x = normed_171_cast_fp16, y = var_6037_to_fp16)[name = string("hidden_states_115_cast_fp16")]; - tensor var_6042 = const()[name = string("op_6042"), val = tensor([0, 2, 1])]; - tensor var_6045_axes_0 = const()[name = string("op_6045_axes_0"), val = tensor([2])]; - tensor var_6043_cast_fp16 = transpose(perm = var_6042, x = hidden_states_115_cast_fp16)[name = string("transpose_113")]; - tensor var_6045_cast_fp16 = expand_dims(axes = var_6045_axes_0, x = var_6043_cast_fp16)[name = string("op_6045_cast_fp16")]; - string var_6061_pad_type_0 = const()[name = string("op_6061_pad_type_0"), val = string("valid")]; - tensor var_6061_strides_0 = const()[name = string("op_6061_strides_0"), val = tensor([1, 1])]; - tensor var_6061_pad_0 = const()[name = string("op_6061_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_6061_dilations_0 = const()[name = string("op_6061_dilations_0"), val = tensor([1, 1])]; - int32 var_6061_groups_0 = const()[name = string("op_6061_groups_0"), val = int32(1)]; - tensor var_6061 = conv(dilations = var_6061_dilations_0, groups = var_6061_groups_0, pad = var_6061_pad_0, pad_type = var_6061_pad_type_0, strides = var_6061_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_6045_cast_fp16)[name = string("op_6061")]; - tensor var_6066 = const()[name = string("op_6066"), val = tensor([1, 4, 1, 256])]; - tensor var_6067 = reshape(shape = var_6066, x = var_6061)[name = string("op_6067")]; - string var_6083_pad_type_0 = const()[name = string("op_6083_pad_type_0"), val = string("valid")]; - tensor var_6083_strides_0 = const()[name = string("op_6083_strides_0"), val = tensor([1, 1])]; - tensor var_6083_pad_0 = const()[name = string("op_6083_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_6083_dilations_0 = const()[name = string("op_6083_dilations_0"), val = tensor([1, 1])]; - int32 var_6083_groups_0 = const()[name = string("op_6083_groups_0"), val = int32(1)]; - tensor var_6083 = conv(dilations = var_6083_dilations_0, groups = var_6083_groups_0, pad = var_6083_pad_0, pad_type = var_6083_pad_type_0, strides = var_6083_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_6045_cast_fp16)[name = string("op_6083")]; - tensor var_6088 = const()[name = string("op_6088"), val = tensor([1, 1, 1, 256])]; - tensor var_6089 = reshape(shape = var_6088, x = var_6083)[name = string("op_6089")]; - string var_6105_pad_type_0 = const()[name = string("op_6105_pad_type_0"), val = string("valid")]; - tensor var_6105_strides_0 = const()[name = string("op_6105_strides_0"), val = tensor([1, 1])]; - tensor var_6105_pad_0 = const()[name = string("op_6105_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_6105_dilations_0 = const()[name = string("op_6105_dilations_0"), val = tensor([1, 1])]; - int32 var_6105_groups_0 = const()[name = string("op_6105_groups_0"), val = int32(1)]; - tensor var_6105 = conv(dilations = var_6105_dilations_0, groups = var_6105_groups_0, pad = var_6105_pad_0, pad_type = var_6105_pad_type_0, strides = var_6105_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_6045_cast_fp16)[name = string("op_6105")]; - tensor var_6110 = const()[name = string("op_6110"), val = tensor([1, 1, 1, 256])]; - tensor var_6111 = reshape(shape = var_6110, x = var_6105)[name = string("op_6111")]; - int32 var_6126 = const()[name = string("op_6126"), val = int32(-1)]; - fp16 const_270_promoted = const()[name = string("const_270_promoted"), val = fp16(-0x1p+0)]; - tensor var_6128 = mul(x = var_6067, y = const_270_promoted)[name = string("op_6128")]; - bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; - tensor input_145 = concat(axis = var_6126, interleave = input_145_interleave_0, values = (var_6067, var_6128))[name = string("input_145")]; - tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; - fp16 var_6123_to_fp16 = const()[name = string("op_6123_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_6123_to_fp16, x = input_145)[name = string("normed_173_cast_fp16")]; - tensor normed_175_begin_0 = const()[name = string("normed_175_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_175_end_0 = const()[name = string("normed_175_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_175_end_mask_0 = const()[name = string("normed_175_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_175 = slice_by_index(begin = normed_175_begin_0, end = normed_175_end_0, end_mask = normed_175_end_mask_0, x = normed_173_cast_fp16)[name = string("normed_175")]; - tensor var_6142_to_fp16 = const()[name = string("op_6142_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185422336)))]; - tensor q_15_cast_fp16 = mul(x = normed_175, y = var_6142_to_fp16)[name = string("q_15_cast_fp16")]; - int32 var_6153 = const()[name = string("op_6153"), val = int32(-1)]; - fp16 const_274_promoted = const()[name = string("const_274_promoted"), val = fp16(-0x1p+0)]; - tensor var_6155 = mul(x = var_6089, y = const_274_promoted)[name = string("op_6155")]; - bool input_147_interleave_0 = const()[name = string("input_147_interleave_0"), val = bool(false)]; - tensor input_147 = concat(axis = var_6153, interleave = input_147_interleave_0, values = (var_6089, var_6155))[name = string("input_147")]; - tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; - fp16 var_6150_to_fp16 = const()[name = string("op_6150_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_6150_to_fp16, x = input_147)[name = string("normed_177_cast_fp16")]; - tensor normed_179_begin_0 = const()[name = string("normed_179_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_179_end_0 = const()[name = string("normed_179_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_179_end_mask_0 = const()[name = string("normed_179_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_179 = slice_by_index(begin = normed_179_begin_0, end = normed_179_end_0, end_mask = normed_179_end_mask_0, x = normed_177_cast_fp16)[name = string("normed_179")]; - tensor var_6169_to_fp16 = const()[name = string("op_6169_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185422912)))]; - tensor k_15_cast_fp16 = mul(x = normed_179, y = var_6169_to_fp16)[name = string("k_15_cast_fp16")]; - tensor var_6171_cast_fp16 = mul(x = q_15_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6171_cast_fp16")]; - tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15_cast_fp16)[name = string("x1_29_cast_fp16")]; - tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15_cast_fp16)[name = string("x2_29_cast_fp16")]; - fp16 const_280_promoted_to_fp16 = const()[name = string("const_280_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6192_cast_fp16 = mul(x = x2_29_cast_fp16, y = const_280_promoted_to_fp16)[name = string("op_6192_cast_fp16")]; - int32 var_6194 = const()[name = string("op_6194"), val = int32(-1)]; - bool var_6195_interleave_0 = const()[name = string("op_6195_interleave_0"), val = bool(false)]; - tensor var_6195_cast_fp16 = concat(axis = var_6194, interleave = var_6195_interleave_0, values = (var_6192_cast_fp16, x1_29_cast_fp16))[name = string("op_6195_cast_fp16")]; - tensor var_6196_cast_fp16 = mul(x = var_6195_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6196_cast_fp16")]; - tensor query_states_29_cast_fp16 = add(x = var_6171_cast_fp16, y = var_6196_cast_fp16)[name = string("query_states_29_cast_fp16")]; - tensor var_6199_cast_fp16 = mul(x = k_15_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6199_cast_fp16")]; - tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15_cast_fp16)[name = string("x1_31_cast_fp16")]; - tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15_cast_fp16)[name = string("x2_31_cast_fp16")]; - fp16 const_283_promoted_to_fp16 = const()[name = string("const_283_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6220_cast_fp16 = mul(x = x2_31_cast_fp16, y = const_283_promoted_to_fp16)[name = string("op_6220_cast_fp16")]; - int32 var_6222 = const()[name = string("op_6222"), val = int32(-1)]; - bool var_6223_interleave_0 = const()[name = string("op_6223_interleave_0"), val = bool(false)]; - tensor var_6223_cast_fp16 = concat(axis = var_6222, interleave = var_6223_interleave_0, values = (var_6220_cast_fp16, x1_31_cast_fp16))[name = string("op_6223_cast_fp16")]; - tensor var_6224_cast_fp16 = mul(x = var_6223_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6224_cast_fp16")]; - tensor key_states_29_cast_fp16 = add(x = var_6199_cast_fp16, y = var_6224_cast_fp16)[name = string("key_states_29_cast_fp16")]; - tensor key_slice_13_begin_0 = const()[name = string("key_slice_13_begin_0"), val = tensor([6, 0, 0, 0])]; - tensor key_slice_13_end_0 = const()[name = string("key_slice_13_end_0"), val = tensor([7, 1, 512, 256])]; - tensor key_slice_13_end_mask_0 = const()[name = string("key_slice_13_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_13_cast_fp16 = slice_by_index(begin = key_slice_13_begin_0, end = key_slice_13_end_0, end_mask = key_slice_13_end_mask_0, x = coreml_update_state_65)[name = string("key_slice_13_cast_fp16")]; - tensor key_tail_13_begin_0 = const()[name = string("key_tail_13_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_13_end_0 = const()[name = string("key_tail_13_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_13_cast_fp16 = slice_by_index(begin = key_tail_13_begin_0, end = key_tail_13_end_0, x = key_slice_13_cast_fp16)[name = string("key_tail_13_cast_fp16")]; - int32 var_6237 = const()[name = string("op_6237"), val = int32(2)]; - bool shifted_key_13_interleave_0 = const()[name = string("shifted_key_13_interleave_0"), val = bool(false)]; - tensor shifted_key_13_cast_fp16 = concat(axis = var_6237, interleave = shifted_key_13_interleave_0, values = (key_tail_13_cast_fp16, key_states_29_cast_fp16))[name = string("shifted_key_13_cast_fp16")]; - tensor concat_32 = const()[name = string("concat_32"), val = tensor([6, 0, 0, 0])]; - tensor concat_33 = const()[name = string("concat_33"), val = tensor([7, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_32, begin_mask = model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0, end = concat_33, end_mask = model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_13_stride_0, update = shifted_key_13_cast_fp16, x = coreml_update_state_65)[name = string("model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_66_write_state")]; - tensor coreml_update_state_66 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_66")]; - tensor value_slice_13_begin_0 = const()[name = string("value_slice_13_begin_0"), val = tensor([28, 0, 0, 0])]; - tensor value_slice_13_end_0 = const()[name = string("value_slice_13_end_0"), val = tensor([29, 1, 512, 256])]; - tensor value_slice_13_end_mask_0 = const()[name = string("value_slice_13_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_13_cast_fp16 = slice_by_index(begin = value_slice_13_begin_0, end = value_slice_13_end_0, end_mask = value_slice_13_end_mask_0, x = coreml_update_state_66)[name = string("value_slice_13_cast_fp16")]; - tensor value_tail_13_begin_0 = const()[name = string("value_tail_13_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_13_end_0 = const()[name = string("value_tail_13_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_13_cast_fp16 = slice_by_index(begin = value_tail_13_begin_0, end = value_tail_13_end_0, x = value_slice_13_cast_fp16)[name = string("value_tail_13_cast_fp16")]; - int32 var_6271 = const()[name = string("op_6271"), val = int32(2)]; - bool shifted_value_13_interleave_0 = const()[name = string("shifted_value_13_interleave_0"), val = bool(false)]; - tensor shifted_value_13_cast_fp16 = concat(axis = var_6271, interleave = shifted_value_13_interleave_0, values = (value_tail_13_cast_fp16, var_6111))[name = string("shifted_value_13_cast_fp16")]; - tensor concat_34 = const()[name = string("concat_34"), val = tensor([28, 0, 0, 0])]; - tensor concat_35 = const()[name = string("concat_35"), val = tensor([29, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_14_stride_0, update = shifted_value_13_cast_fp16, x = coreml_update_state_66)[name = string("model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_67_write_state")]; - tensor coreml_update_state_67 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_67")]; - tensor var_6299_begin_0 = const()[name = string("op_6299_begin_0"), val = tensor([6, 0, 0, 0])]; - tensor var_6299_end_0 = const()[name = string("op_6299_end_0"), val = tensor([7, 1, 512, 256])]; - tensor var_6299_end_mask_0 = const()[name = string("op_6299_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_6299_cast_fp16 = slice_by_index(begin = var_6299_begin_0, end = var_6299_end_0, end_mask = var_6299_end_mask_0, x = coreml_update_state_67)[name = string("op_6299_cast_fp16")]; - tensor var_6306_begin_0 = const()[name = string("op_6306_begin_0"), val = tensor([28, 0, 0, 0])]; - tensor var_6306_end_0 = const()[name = string("op_6306_end_0"), val = tensor([29, 1, 512, 256])]; - tensor var_6306_end_mask_0 = const()[name = string("op_6306_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_6306_cast_fp16 = slice_by_index(begin = var_6306_begin_0, end = var_6306_end_0, end_mask = var_6306_end_mask_0, x = coreml_update_state_67)[name = string("op_6306_cast_fp16")]; - tensor var_6343 = const()[name = string("op_6343"), val = tensor([1, 4, 1, 1])]; - tensor x_117_cast_fp16 = tile(reps = var_6343, x = var_6299_cast_fp16)[name = string("x_117_cast_fp16")]; - tensor var_6363 = const()[name = string("op_6363"), val = tensor([1, 4, 1, 1])]; - tensor x_123_cast_fp16 = tile(reps = var_6363, x = var_6306_cast_fp16)[name = string("x_123_cast_fp16")]; - bool var_6390_transpose_x_1 = const()[name = string("op_6390_transpose_x_1"), val = bool(false)]; - bool var_6390_transpose_y_1 = const()[name = string("op_6390_transpose_y_1"), val = bool(true)]; - tensor var_6390 = matmul(transpose_x = var_6390_transpose_x_1, transpose_y = var_6390_transpose_y_1, x = query_states_29_cast_fp16, y = x_117_cast_fp16)[name = string("op_6390")]; - fp16 var_6391_to_fp16 = const()[name = string("op_6391_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_43_cast_fp16 = mul(x = var_6390, y = var_6391_to_fp16)[name = string("attn_weights_43_cast_fp16")]; - tensor attn_weights_45_cast_fp16 = add(x = attn_weights_43_cast_fp16, y = var_2059)[name = string("attn_weights_45_cast_fp16")]; - int32 var_6426 = const()[name = string("op_6426"), val = int32(-1)]; - tensor attn_weights_47_cast_fp16 = softmax(axis = var_6426, x = attn_weights_45_cast_fp16)[name = string("attn_weights_47_cast_fp16")]; - bool attn_output_71_transpose_x_0 = const()[name = string("attn_output_71_transpose_x_0"), val = bool(false)]; - bool attn_output_71_transpose_y_0 = const()[name = string("attn_output_71_transpose_y_0"), val = bool(false)]; - tensor attn_output_71_cast_fp16 = matmul(transpose_x = attn_output_71_transpose_x_0, transpose_y = attn_output_71_transpose_y_0, x = attn_weights_47_cast_fp16, y = x_123_cast_fp16)[name = string("attn_output_71_cast_fp16")]; - tensor var_6437_perm_0 = const()[name = string("op_6437_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_6441 = const()[name = string("op_6441"), val = tensor([1, 1, 1024])]; - tensor var_6437_cast_fp16 = transpose(perm = var_6437_perm_0, x = attn_output_71_cast_fp16)[name = string("transpose_112")]; - tensor attn_output_75_cast_fp16 = reshape(shape = var_6441, x = var_6437_cast_fp16)[name = string("attn_output_75_cast_fp16")]; - tensor var_6446 = const()[name = string("op_6446"), val = tensor([0, 2, 1])]; - string var_6462_pad_type_0 = const()[name = string("op_6462_pad_type_0"), val = string("valid")]; - int32 var_6462_groups_0 = const()[name = string("op_6462_groups_0"), val = int32(1)]; - tensor var_6462_strides_0 = const()[name = string("op_6462_strides_0"), val = tensor([1])]; - tensor var_6462_pad_0 = const()[name = string("op_6462_pad_0"), val = tensor([0, 0])]; - tensor var_6462_dilations_0 = const()[name = string("op_6462_dilations_0"), val = tensor([1])]; - tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185423488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186308288))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_6447_cast_fp16 = transpose(perm = var_6446, x = attn_output_75_cast_fp16)[name = string("transpose_111")]; - tensor var_6462_cast_fp16 = conv(dilations = var_6462_dilations_0, groups = var_6462_groups_0, pad = var_6462_pad_0, pad_type = var_6462_pad_type_0, strides = var_6462_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_6447_cast_fp16)[name = string("op_6462_cast_fp16")]; - tensor var_6466 = const()[name = string("op_6466"), val = tensor([0, 2, 1])]; - int32 var_6477 = const()[name = string("op_6477"), val = int32(-1)]; - fp16 const_292_promoted_to_fp16 = const()[name = string("const_292_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_121_cast_fp16 = transpose(perm = var_6466, x = var_6462_cast_fp16)[name = string("transpose_110")]; - tensor var_6479_cast_fp16 = mul(x = hidden_states_121_cast_fp16, y = const_292_promoted_to_fp16)[name = string("op_6479_cast_fp16")]; - bool input_151_interleave_0 = const()[name = string("input_151_interleave_0"), val = bool(false)]; - tensor input_151_cast_fp16 = concat(axis = var_6477, interleave = input_151_interleave_0, values = (hidden_states_121_cast_fp16, var_6479_cast_fp16))[name = string("input_151_cast_fp16")]; - tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; - fp16 var_6474_to_fp16 = const()[name = string("op_6474_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_6474_to_fp16, x = input_151_cast_fp16)[name = string("normed_181_cast_fp16")]; - tensor normed_183_begin_0 = const()[name = string("normed_183_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_183_end_0 = const()[name = string("normed_183_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_183_end_mask_0 = const()[name = string("normed_183_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_183_cast_fp16 = slice_by_index(begin = normed_183_begin_0, end = normed_183_end_0, end_mask = normed_183_end_mask_0, x = normed_181_cast_fp16)[name = string("normed_183_cast_fp16")]; - tensor var_6493_to_fp16 = const()[name = string("op_6493_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186326784)))]; - tensor attn_output_79_cast_fp16 = mul(x = normed_183_cast_fp16, y = var_6493_to_fp16)[name = string("attn_output_79_cast_fp16")]; - tensor hidden_states_123_cast_fp16 = add(x = hidden_states_113_cast_fp16, y = attn_output_79_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; - int32 var_6506 = const()[name = string("op_6506"), val = int32(-1)]; - fp16 const_296_promoted_to_fp16 = const()[name = string("const_296_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6508_cast_fp16 = mul(x = hidden_states_123_cast_fp16, y = const_296_promoted_to_fp16)[name = string("op_6508_cast_fp16")]; - bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; - tensor input_153_cast_fp16 = concat(axis = var_6506, interleave = input_153_interleave_0, values = (hidden_states_123_cast_fp16, var_6508_cast_fp16))[name = string("input_153_cast_fp16")]; - tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; - fp16 var_6503_to_fp16 = const()[name = string("op_6503_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_6503_to_fp16, x = input_153_cast_fp16)[name = string("normed_185_cast_fp16")]; - tensor normed_187_begin_0 = const()[name = string("normed_187_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_187_end_0 = const()[name = string("normed_187_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_187_end_mask_0 = const()[name = string("normed_187_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_187_cast_fp16 = slice_by_index(begin = normed_187_begin_0, end = normed_187_end_0, end_mask = normed_187_end_mask_0, x = normed_185_cast_fp16)[name = string("normed_187_cast_fp16")]; - tensor var_6522_to_fp16 = const()[name = string("op_6522_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186329152)))]; - tensor x_125_cast_fp16 = mul(x = normed_187_cast_fp16, y = var_6522_to_fp16)[name = string("x_125_cast_fp16")]; - tensor var_6534 = const()[name = string("op_6534"), val = tensor([0, 2, 1])]; - tensor input_155_axes_0 = const()[name = string("input_155_axes_0"), val = tensor([2])]; - tensor var_6535_cast_fp16 = transpose(perm = var_6534, x = x_125_cast_fp16)[name = string("transpose_109")]; - tensor input_155_cast_fp16 = expand_dims(axes = input_155_axes_0, x = var_6535_cast_fp16)[name = string("input_155_cast_fp16")]; - string x_127_pad_type_0 = const()[name = string("x_127_pad_type_0"), val = string("valid")]; - tensor x_127_strides_0 = const()[name = string("x_127_strides_0"), val = tensor([1, 1])]; - tensor x_127_pad_0 = const()[name = string("x_127_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_127_dilations_0 = const()[name = string("x_127_dilations_0"), val = tensor([1, 1])]; - int32 x_127_groups_0 = const()[name = string("x_127_groups_0"), val = int32(1)]; - tensor model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646125952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652097984))))[name = string("model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_127_cast_fp16 = conv(dilations = x_127_dilations_0, groups = x_127_groups_0, pad = x_127_pad_0, pad_type = x_127_pad_type_0, strides = x_127_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_155_cast_fp16)[name = string("x_127_cast_fp16")]; - string b_15_pad_type_0 = const()[name = string("b_15_pad_type_0"), val = string("valid")]; - tensor b_15_strides_0 = const()[name = string("b_15_strides_0"), val = tensor([1, 1])]; - tensor b_15_pad_0 = const()[name = string("b_15_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_15_dilations_0 = const()[name = string("b_15_dilations_0"), val = tensor([1, 1])]; - int32 b_15_groups_0 = const()[name = string("b_15_groups_0"), val = int32(1)]; - tensor model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652208640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658180672))))[name = string("model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_15_cast_fp16 = conv(dilations = b_15_dilations_0, groups = b_15_groups_0, pad = b_15_pad_0, pad_type = b_15_pad_type_0, strides = b_15_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_155_cast_fp16)[name = string("b_15_cast_fp16")]; - string var_6560_mode_0 = const()[name = string("op_6560_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_6560_cast_fp16 = gelu(mode = var_6560_mode_0, x = x_127_cast_fp16)[name = string("op_6560_cast_fp16")]; - tensor input_157_cast_fp16 = mul(x = var_6560_cast_fp16, y = b_15_cast_fp16)[name = string("input_157_cast_fp16")]; - string e_15_pad_type_0 = const()[name = string("e_15_pad_type_0"), val = string("valid")]; - tensor e_15_strides_0 = const()[name = string("e_15_strides_0"), val = tensor([1, 1])]; - tensor e_15_pad_0 = const()[name = string("e_15_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_15_dilations_0 = const()[name = string("e_15_dilations_0"), val = tensor([1, 1])]; - int32 e_15_groups_0 = const()[name = string("e_15_groups_0"), val = int32(1)]; - tensor model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198496896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204468928))))[name = string("model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_15_cast_fp16 = conv(dilations = e_15_dilations_0, groups = e_15_groups_0, pad = e_15_pad_0, pad_type = e_15_pad_type_0, strides = e_15_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_157_cast_fp16)[name = string("e_15_cast_fp16")]; - tensor var_6568_axes_0 = const()[name = string("op_6568_axes_0"), val = tensor([2])]; - tensor var_6568_cast_fp16 = squeeze(axes = var_6568_axes_0, x = e_15_cast_fp16)[name = string("op_6568_cast_fp16")]; - tensor var_6569 = const()[name = string("op_6569"), val = tensor([0, 2, 1])]; - int32 var_6580 = const()[name = string("op_6580"), val = int32(-1)]; - fp16 const_300_promoted_to_fp16 = const()[name = string("const_300_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_125_cast_fp16 = transpose(perm = var_6569, x = var_6568_cast_fp16)[name = string("transpose_108")]; - tensor var_6582_cast_fp16 = mul(x = hidden_states_125_cast_fp16, y = const_300_promoted_to_fp16)[name = string("op_6582_cast_fp16")]; - bool input_159_interleave_0 = const()[name = string("input_159_interleave_0"), val = bool(false)]; - tensor input_159_cast_fp16 = concat(axis = var_6580, interleave = input_159_interleave_0, values = (hidden_states_125_cast_fp16, var_6582_cast_fp16))[name = string("input_159_cast_fp16")]; - tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; - fp16 var_6577_to_fp16 = const()[name = string("op_6577_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_6577_to_fp16, x = input_159_cast_fp16)[name = string("normed_189_cast_fp16")]; - tensor normed_191_begin_0 = const()[name = string("normed_191_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_191_end_0 = const()[name = string("normed_191_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_191_end_mask_0 = const()[name = string("normed_191_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_191_cast_fp16 = slice_by_index(begin = normed_191_begin_0, end = normed_191_end_0, end_mask = normed_191_end_mask_0, x = normed_189_cast_fp16)[name = string("normed_191_cast_fp16")]; - tensor var_6596_to_fp16 = const()[name = string("op_6596_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204487424)))]; - tensor hidden_states_127_cast_fp16 = mul(x = normed_191_cast_fp16, y = var_6596_to_fp16)[name = string("hidden_states_127_cast_fp16")]; - tensor hidden_states_129_cast_fp16 = add(x = hidden_states_123_cast_fp16, y = hidden_states_127_cast_fp16)[name = string("hidden_states_129_cast_fp16")]; - int32 var_6647 = const()[name = string("op_6647"), val = int32(-1)]; - fp16 const_304_promoted_to_fp16 = const()[name = string("const_304_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6649_cast_fp16 = mul(x = hidden_states_129_cast_fp16, y = const_304_promoted_to_fp16)[name = string("op_6649_cast_fp16")]; - bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; - tensor input_161_cast_fp16 = concat(axis = var_6647, interleave = input_161_interleave_0, values = (hidden_states_129_cast_fp16, var_6649_cast_fp16))[name = string("input_161_cast_fp16")]; - tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; - fp16 var_6644_to_fp16 = const()[name = string("op_6644_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_6644_to_fp16, x = input_161_cast_fp16)[name = string("normed_193_cast_fp16")]; - tensor normed_195_begin_0 = const()[name = string("normed_195_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_195_end_0 = const()[name = string("normed_195_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_195_end_mask_0 = const()[name = string("normed_195_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_195_cast_fp16 = slice_by_index(begin = normed_195_begin_0, end = normed_195_end_0, end_mask = normed_195_end_mask_0, x = normed_193_cast_fp16)[name = string("normed_195_cast_fp16")]; - tensor var_6663_to_fp16 = const()[name = string("op_6663_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204489792)))]; - tensor hidden_states_131_cast_fp16 = mul(x = normed_195_cast_fp16, y = var_6663_to_fp16)[name = string("hidden_states_131_cast_fp16")]; - tensor var_6668 = const()[name = string("op_6668"), val = tensor([0, 2, 1])]; - tensor var_6671_axes_0 = const()[name = string("op_6671_axes_0"), val = tensor([2])]; - tensor var_6669_cast_fp16 = transpose(perm = var_6668, x = hidden_states_131_cast_fp16)[name = string("transpose_107")]; - tensor var_6671_cast_fp16 = expand_dims(axes = var_6671_axes_0, x = var_6669_cast_fp16)[name = string("op_6671_cast_fp16")]; - string var_6687_pad_type_0 = const()[name = string("op_6687_pad_type_0"), val = string("valid")]; - tensor var_6687_strides_0 = const()[name = string("op_6687_strides_0"), val = tensor([1, 1])]; - tensor var_6687_pad_0 = const()[name = string("op_6687_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_6687_dilations_0 = const()[name = string("op_6687_dilations_0"), val = tensor([1, 1])]; - int32 var_6687_groups_0 = const()[name = string("op_6687_groups_0"), val = int32(1)]; - tensor var_6687 = conv(dilations = var_6687_dilations_0, groups = var_6687_groups_0, pad = var_6687_pad_0, pad_type = var_6687_pad_type_0, strides = var_6687_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_6671_cast_fp16)[name = string("op_6687")]; - tensor var_6692 = const()[name = string("op_6692"), val = tensor([1, 4, 1, 256])]; - tensor var_6693 = reshape(shape = var_6692, x = var_6687)[name = string("op_6693")]; - string var_6709_pad_type_0 = const()[name = string("op_6709_pad_type_0"), val = string("valid")]; - tensor var_6709_strides_0 = const()[name = string("op_6709_strides_0"), val = tensor([1, 1])]; - tensor var_6709_pad_0 = const()[name = string("op_6709_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_6709_dilations_0 = const()[name = string("op_6709_dilations_0"), val = tensor([1, 1])]; - int32 var_6709_groups_0 = const()[name = string("op_6709_groups_0"), val = int32(1)]; - tensor var_6709 = conv(dilations = var_6709_dilations_0, groups = var_6709_groups_0, pad = var_6709_pad_0, pad_type = var_6709_pad_type_0, strides = var_6709_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_6671_cast_fp16)[name = string("op_6709")]; - tensor var_6714 = const()[name = string("op_6714"), val = tensor([1, 1, 1, 256])]; - tensor var_6715 = reshape(shape = var_6714, x = var_6709)[name = string("op_6715")]; - string var_6731_pad_type_0 = const()[name = string("op_6731_pad_type_0"), val = string("valid")]; - tensor var_6731_strides_0 = const()[name = string("op_6731_strides_0"), val = tensor([1, 1])]; - tensor var_6731_pad_0 = const()[name = string("op_6731_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_6731_dilations_0 = const()[name = string("op_6731_dilations_0"), val = tensor([1, 1])]; - int32 var_6731_groups_0 = const()[name = string("op_6731_groups_0"), val = int32(1)]; - tensor var_6731 = conv(dilations = var_6731_dilations_0, groups = var_6731_groups_0, pad = var_6731_pad_0, pad_type = var_6731_pad_type_0, strides = var_6731_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_6671_cast_fp16)[name = string("op_6731")]; - tensor var_6736 = const()[name = string("op_6736"), val = tensor([1, 1, 1, 256])]; - tensor var_6737 = reshape(shape = var_6736, x = var_6731)[name = string("op_6737")]; - int32 var_6752 = const()[name = string("op_6752"), val = int32(-1)]; - fp16 const_308_promoted = const()[name = string("const_308_promoted"), val = fp16(-0x1p+0)]; - tensor var_6754 = mul(x = var_6693, y = const_308_promoted)[name = string("op_6754")]; - bool input_165_interleave_0 = const()[name = string("input_165_interleave_0"), val = bool(false)]; - tensor input_165 = concat(axis = var_6752, interleave = input_165_interleave_0, values = (var_6693, var_6754))[name = string("input_165")]; - tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; - fp16 var_6749_to_fp16 = const()[name = string("op_6749_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_6749_to_fp16, x = input_165)[name = string("normed_197_cast_fp16")]; - tensor normed_199_begin_0 = const()[name = string("normed_199_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_199_end_0 = const()[name = string("normed_199_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_199_end_mask_0 = const()[name = string("normed_199_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_199 = slice_by_index(begin = normed_199_begin_0, end = normed_199_end_0, end_mask = normed_199_end_mask_0, x = normed_197_cast_fp16)[name = string("normed_199")]; - tensor var_6768_to_fp16 = const()[name = string("op_6768_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204492160)))]; - tensor q_17_cast_fp16 = mul(x = normed_199, y = var_6768_to_fp16)[name = string("q_17_cast_fp16")]; - int32 var_6779 = const()[name = string("op_6779"), val = int32(-1)]; - fp16 const_312_promoted = const()[name = string("const_312_promoted"), val = fp16(-0x1p+0)]; - tensor var_6781 = mul(x = var_6715, y = const_312_promoted)[name = string("op_6781")]; - bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; - tensor input_167 = concat(axis = var_6779, interleave = input_167_interleave_0, values = (var_6715, var_6781))[name = string("input_167")]; - tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; - fp16 var_6776_to_fp16 = const()[name = string("op_6776_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_6776_to_fp16, x = input_167)[name = string("normed_201_cast_fp16")]; - tensor normed_203_begin_0 = const()[name = string("normed_203_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_203_end_0 = const()[name = string("normed_203_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_203_end_mask_0 = const()[name = string("normed_203_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_203 = slice_by_index(begin = normed_203_begin_0, end = normed_203_end_0, end_mask = normed_203_end_mask_0, x = normed_201_cast_fp16)[name = string("normed_203")]; - tensor var_6795_to_fp16 = const()[name = string("op_6795_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204492736)))]; - tensor k_17_cast_fp16 = mul(x = normed_203, y = var_6795_to_fp16)[name = string("k_17_cast_fp16")]; - tensor var_6797_cast_fp16 = mul(x = q_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6797_cast_fp16")]; - tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17_cast_fp16)[name = string("x1_33_cast_fp16")]; - tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17_cast_fp16)[name = string("x2_33_cast_fp16")]; - fp16 const_318_promoted_to_fp16 = const()[name = string("const_318_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6818_cast_fp16 = mul(x = x2_33_cast_fp16, y = const_318_promoted_to_fp16)[name = string("op_6818_cast_fp16")]; - int32 var_6820 = const()[name = string("op_6820"), val = int32(-1)]; - bool var_6821_interleave_0 = const()[name = string("op_6821_interleave_0"), val = bool(false)]; - tensor var_6821_cast_fp16 = concat(axis = var_6820, interleave = var_6821_interleave_0, values = (var_6818_cast_fp16, x1_33_cast_fp16))[name = string("op_6821_cast_fp16")]; - tensor var_6822_cast_fp16 = mul(x = var_6821_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6822_cast_fp16")]; - tensor query_states_33_cast_fp16 = add(x = var_6797_cast_fp16, y = var_6822_cast_fp16)[name = string("query_states_33_cast_fp16")]; - tensor var_6825_cast_fp16 = mul(x = k_17_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6825_cast_fp16")]; - tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17_cast_fp16)[name = string("x1_35_cast_fp16")]; - tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17_cast_fp16)[name = string("x2_35_cast_fp16")]; - fp16 const_321_promoted_to_fp16 = const()[name = string("const_321_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6846_cast_fp16 = mul(x = x2_35_cast_fp16, y = const_321_promoted_to_fp16)[name = string("op_6846_cast_fp16")]; - int32 var_6848 = const()[name = string("op_6848"), val = int32(-1)]; - bool var_6849_interleave_0 = const()[name = string("op_6849_interleave_0"), val = bool(false)]; - tensor var_6849_cast_fp16 = concat(axis = var_6848, interleave = var_6849_interleave_0, values = (var_6846_cast_fp16, x1_35_cast_fp16))[name = string("op_6849_cast_fp16")]; - tensor var_6850_cast_fp16 = mul(x = var_6849_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6850_cast_fp16")]; - tensor key_states_33_cast_fp16 = add(x = var_6825_cast_fp16, y = var_6850_cast_fp16)[name = string("key_states_33_cast_fp16")]; - tensor key_slice_15_begin_0 = const()[name = string("key_slice_15_begin_0"), val = tensor([7, 0, 0, 0])]; - tensor key_slice_15_end_0 = const()[name = string("key_slice_15_end_0"), val = tensor([8, 1, 512, 256])]; - tensor key_slice_15_end_mask_0 = const()[name = string("key_slice_15_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_15_cast_fp16 = slice_by_index(begin = key_slice_15_begin_0, end = key_slice_15_end_0, end_mask = key_slice_15_end_mask_0, x = coreml_update_state_67)[name = string("key_slice_15_cast_fp16")]; - tensor key_tail_15_begin_0 = const()[name = string("key_tail_15_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_15_end_0 = const()[name = string("key_tail_15_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_15_cast_fp16 = slice_by_index(begin = key_tail_15_begin_0, end = key_tail_15_end_0, x = key_slice_15_cast_fp16)[name = string("key_tail_15_cast_fp16")]; - int32 var_6863 = const()[name = string("op_6863"), val = int32(2)]; - bool shifted_key_15_interleave_0 = const()[name = string("shifted_key_15_interleave_0"), val = bool(false)]; - tensor shifted_key_15_cast_fp16 = concat(axis = var_6863, interleave = shifted_key_15_interleave_0, values = (key_tail_15_cast_fp16, key_states_33_cast_fp16))[name = string("shifted_key_15_cast_fp16")]; - tensor concat_36 = const()[name = string("concat_36"), val = tensor([7, 0, 0, 0])]; - tensor concat_37 = const()[name = string("concat_37"), val = tensor([8, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_36, begin_mask = model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0, end = concat_37, end_mask = model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_15_stride_0, update = shifted_key_15_cast_fp16, x = coreml_update_state_67)[name = string("model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_68_write_state")]; - tensor coreml_update_state_68 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_68")]; - tensor value_slice_15_begin_0 = const()[name = string("value_slice_15_begin_0"), val = tensor([29, 0, 0, 0])]; - tensor value_slice_15_end_0 = const()[name = string("value_slice_15_end_0"), val = tensor([30, 1, 512, 256])]; - tensor value_slice_15_end_mask_0 = const()[name = string("value_slice_15_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_15_cast_fp16 = slice_by_index(begin = value_slice_15_begin_0, end = value_slice_15_end_0, end_mask = value_slice_15_end_mask_0, x = coreml_update_state_68)[name = string("value_slice_15_cast_fp16")]; - tensor value_tail_15_begin_0 = const()[name = string("value_tail_15_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_15_end_0 = const()[name = string("value_tail_15_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_15_cast_fp16 = slice_by_index(begin = value_tail_15_begin_0, end = value_tail_15_end_0, x = value_slice_15_cast_fp16)[name = string("value_tail_15_cast_fp16")]; - int32 var_6897 = const()[name = string("op_6897"), val = int32(2)]; - bool shifted_value_15_interleave_0 = const()[name = string("shifted_value_15_interleave_0"), val = bool(false)]; - tensor shifted_value_15_cast_fp16 = concat(axis = var_6897, interleave = shifted_value_15_interleave_0, values = (value_tail_15_cast_fp16, var_6737))[name = string("shifted_value_15_cast_fp16")]; - tensor concat_38 = const()[name = string("concat_38"), val = tensor([29, 0, 0, 0])]; - tensor concat_39 = const()[name = string("concat_39"), val = tensor([30, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_16_stride_0, update = shifted_value_15_cast_fp16, x = coreml_update_state_68)[name = string("model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_69_write_state")]; - tensor coreml_update_state_69 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_69")]; - tensor var_6925_begin_0 = const()[name = string("op_6925_begin_0"), val = tensor([7, 0, 0, 0])]; - tensor var_6925_end_0 = const()[name = string("op_6925_end_0"), val = tensor([8, 1, 512, 256])]; - tensor var_6925_end_mask_0 = const()[name = string("op_6925_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_6925_cast_fp16 = slice_by_index(begin = var_6925_begin_0, end = var_6925_end_0, end_mask = var_6925_end_mask_0, x = coreml_update_state_69)[name = string("op_6925_cast_fp16")]; - tensor var_6932_begin_0 = const()[name = string("op_6932_begin_0"), val = tensor([29, 0, 0, 0])]; - tensor var_6932_end_0 = const()[name = string("op_6932_end_0"), val = tensor([30, 1, 512, 256])]; - tensor var_6932_end_mask_0 = const()[name = string("op_6932_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_6932_cast_fp16 = slice_by_index(begin = var_6932_begin_0, end = var_6932_end_0, end_mask = var_6932_end_mask_0, x = coreml_update_state_69)[name = string("op_6932_cast_fp16")]; - tensor var_6969 = const()[name = string("op_6969"), val = tensor([1, 4, 1, 1])]; - tensor x_133_cast_fp16 = tile(reps = var_6969, x = var_6925_cast_fp16)[name = string("x_133_cast_fp16")]; - tensor var_6989 = const()[name = string("op_6989"), val = tensor([1, 4, 1, 1])]; - tensor x_139_cast_fp16 = tile(reps = var_6989, x = var_6932_cast_fp16)[name = string("x_139_cast_fp16")]; - bool var_7016_transpose_x_1 = const()[name = string("op_7016_transpose_x_1"), val = bool(false)]; - bool var_7016_transpose_y_1 = const()[name = string("op_7016_transpose_y_1"), val = bool(true)]; - tensor var_7016 = matmul(transpose_x = var_7016_transpose_x_1, transpose_y = var_7016_transpose_y_1, x = query_states_33_cast_fp16, y = x_133_cast_fp16)[name = string("op_7016")]; - fp16 var_7017_to_fp16 = const()[name = string("op_7017_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_49_cast_fp16 = mul(x = var_7016, y = var_7017_to_fp16)[name = string("attn_weights_49_cast_fp16")]; - tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = var_2059)[name = string("attn_weights_51_cast_fp16")]; - int32 var_7052 = const()[name = string("op_7052"), val = int32(-1)]; - tensor attn_weights_53_cast_fp16 = softmax(axis = var_7052, x = attn_weights_51_cast_fp16)[name = string("attn_weights_53_cast_fp16")]; - bool attn_output_81_transpose_x_0 = const()[name = string("attn_output_81_transpose_x_0"), val = bool(false)]; - bool attn_output_81_transpose_y_0 = const()[name = string("attn_output_81_transpose_y_0"), val = bool(false)]; - tensor attn_output_81_cast_fp16 = matmul(transpose_x = attn_output_81_transpose_x_0, transpose_y = attn_output_81_transpose_y_0, x = attn_weights_53_cast_fp16, y = x_139_cast_fp16)[name = string("attn_output_81_cast_fp16")]; - tensor var_7063_perm_0 = const()[name = string("op_7063_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_7067 = const()[name = string("op_7067"), val = tensor([1, 1, 1024])]; - tensor var_7063_cast_fp16 = transpose(perm = var_7063_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_106")]; - tensor attn_output_85_cast_fp16 = reshape(shape = var_7067, x = var_7063_cast_fp16)[name = string("attn_output_85_cast_fp16")]; - tensor var_7072 = const()[name = string("op_7072"), val = tensor([0, 2, 1])]; - string var_7088_pad_type_0 = const()[name = string("op_7088_pad_type_0"), val = string("valid")]; - int32 var_7088_groups_0 = const()[name = string("op_7088_groups_0"), val = int32(1)]; - tensor var_7088_strides_0 = const()[name = string("op_7088_strides_0"), val = tensor([1])]; - tensor var_7088_pad_0 = const()[name = string("op_7088_pad_0"), val = tensor([0, 0])]; - tensor var_7088_dilations_0 = const()[name = string("op_7088_dilations_0"), val = tensor([1])]; - tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204493312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205378112))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_7073_cast_fp16 = transpose(perm = var_7072, x = attn_output_85_cast_fp16)[name = string("transpose_105")]; - tensor var_7088_cast_fp16 = conv(dilations = var_7088_dilations_0, groups = var_7088_groups_0, pad = var_7088_pad_0, pad_type = var_7088_pad_type_0, strides = var_7088_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_7073_cast_fp16)[name = string("op_7088_cast_fp16")]; - tensor var_7092 = const()[name = string("op_7092"), val = tensor([0, 2, 1])]; - int32 var_7103 = const()[name = string("op_7103"), val = int32(-1)]; - fp16 const_330_promoted_to_fp16 = const()[name = string("const_330_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_137_cast_fp16 = transpose(perm = var_7092, x = var_7088_cast_fp16)[name = string("transpose_104")]; - tensor var_7105_cast_fp16 = mul(x = hidden_states_137_cast_fp16, y = const_330_promoted_to_fp16)[name = string("op_7105_cast_fp16")]; - bool input_171_interleave_0 = const()[name = string("input_171_interleave_0"), val = bool(false)]; - tensor input_171_cast_fp16 = concat(axis = var_7103, interleave = input_171_interleave_0, values = (hidden_states_137_cast_fp16, var_7105_cast_fp16))[name = string("input_171_cast_fp16")]; - tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; - fp16 var_7100_to_fp16 = const()[name = string("op_7100_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_7100_to_fp16, x = input_171_cast_fp16)[name = string("normed_205_cast_fp16")]; - tensor normed_207_begin_0 = const()[name = string("normed_207_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_207_end_0 = const()[name = string("normed_207_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_207_end_mask_0 = const()[name = string("normed_207_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_207_cast_fp16 = slice_by_index(begin = normed_207_begin_0, end = normed_207_end_0, end_mask = normed_207_end_mask_0, x = normed_205_cast_fp16)[name = string("normed_207_cast_fp16")]; - tensor var_7119_to_fp16 = const()[name = string("op_7119_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205396608)))]; - tensor attn_output_89_cast_fp16 = mul(x = normed_207_cast_fp16, y = var_7119_to_fp16)[name = string("attn_output_89_cast_fp16")]; - tensor hidden_states_139_cast_fp16 = add(x = hidden_states_129_cast_fp16, y = attn_output_89_cast_fp16)[name = string("hidden_states_139_cast_fp16")]; - int32 var_7132 = const()[name = string("op_7132"), val = int32(-1)]; - fp16 const_334_promoted_to_fp16 = const()[name = string("const_334_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7134_cast_fp16 = mul(x = hidden_states_139_cast_fp16, y = const_334_promoted_to_fp16)[name = string("op_7134_cast_fp16")]; - bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; - tensor input_173_cast_fp16 = concat(axis = var_7132, interleave = input_173_interleave_0, values = (hidden_states_139_cast_fp16, var_7134_cast_fp16))[name = string("input_173_cast_fp16")]; - tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; - fp16 var_7129_to_fp16 = const()[name = string("op_7129_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_7129_to_fp16, x = input_173_cast_fp16)[name = string("normed_209_cast_fp16")]; - tensor normed_211_begin_0 = const()[name = string("normed_211_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_211_end_0 = const()[name = string("normed_211_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_211_end_mask_0 = const()[name = string("normed_211_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_211_cast_fp16 = slice_by_index(begin = normed_211_begin_0, end = normed_211_end_0, end_mask = normed_211_end_mask_0, x = normed_209_cast_fp16)[name = string("normed_211_cast_fp16")]; - tensor var_7148_to_fp16 = const()[name = string("op_7148_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205398976)))]; - tensor x_141_cast_fp16 = mul(x = normed_211_cast_fp16, y = var_7148_to_fp16)[name = string("x_141_cast_fp16")]; - tensor var_7160 = const()[name = string("op_7160"), val = tensor([0, 2, 1])]; - tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; - tensor var_7161_cast_fp16 = transpose(perm = var_7160, x = x_141_cast_fp16)[name = string("transpose_103")]; - tensor input_175_cast_fp16 = expand_dims(axes = input_175_axes_0, x = var_7161_cast_fp16)[name = string("input_175_cast_fp16")]; - string x_143_pad_type_0 = const()[name = string("x_143_pad_type_0"), val = string("valid")]; - tensor x_143_strides_0 = const()[name = string("x_143_strides_0"), val = tensor([1, 1])]; - tensor x_143_pad_0 = const()[name = string("x_143_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_143_dilations_0 = const()[name = string("x_143_dilations_0"), val = tensor([1, 1])]; - int32 x_143_groups_0 = const()[name = string("x_143_groups_0"), val = int32(1)]; - tensor model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(658291328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(664263360))))[name = string("model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_143_cast_fp16 = conv(dilations = x_143_dilations_0, groups = x_143_groups_0, pad = x_143_pad_0, pad_type = x_143_pad_type_0, strides = x_143_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_175_cast_fp16)[name = string("x_143_cast_fp16")]; - string b_17_pad_type_0 = const()[name = string("b_17_pad_type_0"), val = string("valid")]; - tensor b_17_strides_0 = const()[name = string("b_17_strides_0"), val = tensor([1, 1])]; - tensor b_17_pad_0 = const()[name = string("b_17_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_17_dilations_0 = const()[name = string("b_17_dilations_0"), val = tensor([1, 1])]; - int32 b_17_groups_0 = const()[name = string("b_17_groups_0"), val = int32(1)]; - tensor model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(664374016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(670346048))))[name = string("model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_17_cast_fp16 = conv(dilations = b_17_dilations_0, groups = b_17_groups_0, pad = b_17_pad_0, pad_type = b_17_pad_type_0, strides = b_17_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_175_cast_fp16)[name = string("b_17_cast_fp16")]; - string var_7186_mode_0 = const()[name = string("op_7186_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_7186_cast_fp16 = gelu(mode = var_7186_mode_0, x = x_143_cast_fp16)[name = string("op_7186_cast_fp16")]; - tensor input_177_cast_fp16 = mul(x = var_7186_cast_fp16, y = b_17_cast_fp16)[name = string("input_177_cast_fp16")]; - string e_17_pad_type_0 = const()[name = string("e_17_pad_type_0"), val = string("valid")]; - tensor e_17_strides_0 = const()[name = string("e_17_strides_0"), val = tensor([1, 1])]; - tensor e_17_pad_0 = const()[name = string("e_17_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_17_dilations_0 = const()[name = string("e_17_dilations_0"), val = tensor([1, 1])]; - int32 e_17_groups_0 = const()[name = string("e_17_groups_0"), val = int32(1)]; - tensor model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217566720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223538752))))[name = string("model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_17_cast_fp16 = conv(dilations = e_17_dilations_0, groups = e_17_groups_0, pad = e_17_pad_0, pad_type = e_17_pad_type_0, strides = e_17_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_177_cast_fp16)[name = string("e_17_cast_fp16")]; - tensor var_7194_axes_0 = const()[name = string("op_7194_axes_0"), val = tensor([2])]; - tensor var_7194_cast_fp16 = squeeze(axes = var_7194_axes_0, x = e_17_cast_fp16)[name = string("op_7194_cast_fp16")]; - tensor var_7195 = const()[name = string("op_7195"), val = tensor([0, 2, 1])]; - int32 var_7206 = const()[name = string("op_7206"), val = int32(-1)]; - fp16 const_338_promoted_to_fp16 = const()[name = string("const_338_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_141_cast_fp16 = transpose(perm = var_7195, x = var_7194_cast_fp16)[name = string("transpose_102")]; - tensor var_7208_cast_fp16 = mul(x = hidden_states_141_cast_fp16, y = const_338_promoted_to_fp16)[name = string("op_7208_cast_fp16")]; - bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; - tensor input_179_cast_fp16 = concat(axis = var_7206, interleave = input_179_interleave_0, values = (hidden_states_141_cast_fp16, var_7208_cast_fp16))[name = string("input_179_cast_fp16")]; - tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; - fp16 var_7203_to_fp16 = const()[name = string("op_7203_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_7203_to_fp16, x = input_179_cast_fp16)[name = string("normed_213_cast_fp16")]; - tensor normed_215_begin_0 = const()[name = string("normed_215_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_215_end_0 = const()[name = string("normed_215_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_215_end_mask_0 = const()[name = string("normed_215_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_215_cast_fp16 = slice_by_index(begin = normed_215_begin_0, end = normed_215_end_0, end_mask = normed_215_end_mask_0, x = normed_213_cast_fp16)[name = string("normed_215_cast_fp16")]; - tensor var_7222_to_fp16 = const()[name = string("op_7222_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223557248)))]; - tensor hidden_states_143_cast_fp16 = mul(x = normed_215_cast_fp16, y = var_7222_to_fp16)[name = string("hidden_states_143_cast_fp16")]; - tensor hidden_states_145_cast_fp16 = add(x = hidden_states_139_cast_fp16, y = hidden_states_143_cast_fp16)[name = string("hidden_states_145_cast_fp16")]; - int32 var_7273 = const()[name = string("op_7273"), val = int32(-1)]; - fp16 const_342_promoted_to_fp16 = const()[name = string("const_342_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7275_cast_fp16 = mul(x = hidden_states_145_cast_fp16, y = const_342_promoted_to_fp16)[name = string("op_7275_cast_fp16")]; - bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)]; - tensor input_181_cast_fp16 = concat(axis = var_7273, interleave = input_181_interleave_0, values = (hidden_states_145_cast_fp16, var_7275_cast_fp16))[name = string("input_181_cast_fp16")]; - tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; - fp16 var_7270_to_fp16 = const()[name = string("op_7270_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_7270_to_fp16, x = input_181_cast_fp16)[name = string("normed_217_cast_fp16")]; - tensor normed_219_begin_0 = const()[name = string("normed_219_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_219_end_0 = const()[name = string("normed_219_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_219_end_mask_0 = const()[name = string("normed_219_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_219_cast_fp16 = slice_by_index(begin = normed_219_begin_0, end = normed_219_end_0, end_mask = normed_219_end_mask_0, x = normed_217_cast_fp16)[name = string("normed_219_cast_fp16")]; - tensor var_7289_to_fp16 = const()[name = string("op_7289_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223559616)))]; - tensor hidden_states_147_cast_fp16 = mul(x = normed_219_cast_fp16, y = var_7289_to_fp16)[name = string("hidden_states_147_cast_fp16")]; - tensor var_7294 = const()[name = string("op_7294"), val = tensor([0, 2, 1])]; - tensor var_7297_axes_0 = const()[name = string("op_7297_axes_0"), val = tensor([2])]; - tensor var_7295_cast_fp16 = transpose(perm = var_7294, x = hidden_states_147_cast_fp16)[name = string("transpose_101")]; - tensor var_7297_cast_fp16 = expand_dims(axes = var_7297_axes_0, x = var_7295_cast_fp16)[name = string("op_7297_cast_fp16")]; - string var_7313_pad_type_0 = const()[name = string("op_7313_pad_type_0"), val = string("valid")]; - tensor var_7313_strides_0 = const()[name = string("op_7313_strides_0"), val = tensor([1, 1])]; - tensor var_7313_pad_0 = const()[name = string("op_7313_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_7313_dilations_0 = const()[name = string("op_7313_dilations_0"), val = tensor([1, 1])]; - int32 var_7313_groups_0 = const()[name = string("op_7313_groups_0"), val = int32(1)]; - tensor var_7313 = conv(dilations = var_7313_dilations_0, groups = var_7313_groups_0, pad = var_7313_pad_0, pad_type = var_7313_pad_type_0, strides = var_7313_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_7297_cast_fp16)[name = string("op_7313")]; - tensor var_7318 = const()[name = string("op_7318"), val = tensor([1, 4, 1, 256])]; - tensor var_7319 = reshape(shape = var_7318, x = var_7313)[name = string("op_7319")]; - string var_7335_pad_type_0 = const()[name = string("op_7335_pad_type_0"), val = string("valid")]; - tensor var_7335_strides_0 = const()[name = string("op_7335_strides_0"), val = tensor([1, 1])]; - tensor var_7335_pad_0 = const()[name = string("op_7335_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_7335_dilations_0 = const()[name = string("op_7335_dilations_0"), val = tensor([1, 1])]; - int32 var_7335_groups_0 = const()[name = string("op_7335_groups_0"), val = int32(1)]; - tensor var_7335 = conv(dilations = var_7335_dilations_0, groups = var_7335_groups_0, pad = var_7335_pad_0, pad_type = var_7335_pad_type_0, strides = var_7335_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_7297_cast_fp16)[name = string("op_7335")]; - tensor var_7340 = const()[name = string("op_7340"), val = tensor([1, 1, 1, 256])]; - tensor var_7341 = reshape(shape = var_7340, x = var_7335)[name = string("op_7341")]; - string var_7357_pad_type_0 = const()[name = string("op_7357_pad_type_0"), val = string("valid")]; - tensor var_7357_strides_0 = const()[name = string("op_7357_strides_0"), val = tensor([1, 1])]; - tensor var_7357_pad_0 = const()[name = string("op_7357_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_7357_dilations_0 = const()[name = string("op_7357_dilations_0"), val = tensor([1, 1])]; - int32 var_7357_groups_0 = const()[name = string("op_7357_groups_0"), val = int32(1)]; - tensor var_7357 = conv(dilations = var_7357_dilations_0, groups = var_7357_groups_0, pad = var_7357_pad_0, pad_type = var_7357_pad_type_0, strides = var_7357_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_7297_cast_fp16)[name = string("op_7357")]; - tensor var_7362 = const()[name = string("op_7362"), val = tensor([1, 1, 1, 256])]; - tensor var_7363 = reshape(shape = var_7362, x = var_7357)[name = string("op_7363")]; - int32 var_7378 = const()[name = string("op_7378"), val = int32(-1)]; - fp16 const_346_promoted = const()[name = string("const_346_promoted"), val = fp16(-0x1p+0)]; - tensor var_7380 = mul(x = var_7319, y = const_346_promoted)[name = string("op_7380")]; - bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; - tensor input_185 = concat(axis = var_7378, interleave = input_185_interleave_0, values = (var_7319, var_7380))[name = string("input_185")]; - tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; - fp16 var_7375_to_fp16 = const()[name = string("op_7375_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_7375_to_fp16, x = input_185)[name = string("normed_221_cast_fp16")]; - tensor normed_223_begin_0 = const()[name = string("normed_223_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_223_end_0 = const()[name = string("normed_223_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_223_end_mask_0 = const()[name = string("normed_223_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_223 = slice_by_index(begin = normed_223_begin_0, end = normed_223_end_0, end_mask = normed_223_end_mask_0, x = normed_221_cast_fp16)[name = string("normed_223")]; - tensor var_7394_to_fp16 = const()[name = string("op_7394_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223561984)))]; - tensor q_19_cast_fp16 = mul(x = normed_223, y = var_7394_to_fp16)[name = string("q_19_cast_fp16")]; - int32 var_7405 = const()[name = string("op_7405"), val = int32(-1)]; - fp16 const_350_promoted = const()[name = string("const_350_promoted"), val = fp16(-0x1p+0)]; - tensor var_7407 = mul(x = var_7341, y = const_350_promoted)[name = string("op_7407")]; - bool input_187_interleave_0 = const()[name = string("input_187_interleave_0"), val = bool(false)]; - tensor input_187 = concat(axis = var_7405, interleave = input_187_interleave_0, values = (var_7341, var_7407))[name = string("input_187")]; - tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; - fp16 var_7402_to_fp16 = const()[name = string("op_7402_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_7402_to_fp16, x = input_187)[name = string("normed_225_cast_fp16")]; - tensor normed_227_begin_0 = const()[name = string("normed_227_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_227_end_0 = const()[name = string("normed_227_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_227_end_mask_0 = const()[name = string("normed_227_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_227 = slice_by_index(begin = normed_227_begin_0, end = normed_227_end_0, end_mask = normed_227_end_mask_0, x = normed_225_cast_fp16)[name = string("normed_227")]; - tensor var_7421_to_fp16 = const()[name = string("op_7421_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223562560)))]; - tensor k_19_cast_fp16 = mul(x = normed_227, y = var_7421_to_fp16)[name = string("k_19_cast_fp16")]; - tensor var_7423_cast_fp16 = mul(x = q_19_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7423_cast_fp16")]; - tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19_cast_fp16)[name = string("x1_37_cast_fp16")]; - tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19_cast_fp16)[name = string("x2_37_cast_fp16")]; - fp16 const_356_promoted_to_fp16 = const()[name = string("const_356_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7444_cast_fp16 = mul(x = x2_37_cast_fp16, y = const_356_promoted_to_fp16)[name = string("op_7444_cast_fp16")]; - int32 var_7446 = const()[name = string("op_7446"), val = int32(-1)]; - bool var_7447_interleave_0 = const()[name = string("op_7447_interleave_0"), val = bool(false)]; - tensor var_7447_cast_fp16 = concat(axis = var_7446, interleave = var_7447_interleave_0, values = (var_7444_cast_fp16, x1_37_cast_fp16))[name = string("op_7447_cast_fp16")]; - tensor var_7448_cast_fp16 = mul(x = var_7447_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7448_cast_fp16")]; - tensor query_states_37_cast_fp16 = add(x = var_7423_cast_fp16, y = var_7448_cast_fp16)[name = string("query_states_37_cast_fp16")]; - tensor var_7451_cast_fp16 = mul(x = k_19_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7451_cast_fp16")]; - tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19_cast_fp16)[name = string("x1_39_cast_fp16")]; - tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19_cast_fp16)[name = string("x2_39_cast_fp16")]; - fp16 const_359_promoted_to_fp16 = const()[name = string("const_359_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7472_cast_fp16 = mul(x = x2_39_cast_fp16, y = const_359_promoted_to_fp16)[name = string("op_7472_cast_fp16")]; - int32 var_7474 = const()[name = string("op_7474"), val = int32(-1)]; - bool var_7475_interleave_0 = const()[name = string("op_7475_interleave_0"), val = bool(false)]; - tensor var_7475_cast_fp16 = concat(axis = var_7474, interleave = var_7475_interleave_0, values = (var_7472_cast_fp16, x1_39_cast_fp16))[name = string("op_7475_cast_fp16")]; - tensor var_7476_cast_fp16 = mul(x = var_7475_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7476_cast_fp16")]; - tensor key_states_37_cast_fp16 = add(x = var_7451_cast_fp16, y = var_7476_cast_fp16)[name = string("key_states_37_cast_fp16")]; - tensor key_slice_17_begin_0 = const()[name = string("key_slice_17_begin_0"), val = tensor([8, 0, 0, 0])]; - tensor key_slice_17_end_0 = const()[name = string("key_slice_17_end_0"), val = tensor([9, 1, 512, 256])]; - tensor key_slice_17_end_mask_0 = const()[name = string("key_slice_17_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_17_cast_fp16 = slice_by_index(begin = key_slice_17_begin_0, end = key_slice_17_end_0, end_mask = key_slice_17_end_mask_0, x = coreml_update_state_69)[name = string("key_slice_17_cast_fp16")]; - tensor key_tail_17_begin_0 = const()[name = string("key_tail_17_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_17_end_0 = const()[name = string("key_tail_17_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_17_cast_fp16 = slice_by_index(begin = key_tail_17_begin_0, end = key_tail_17_end_0, x = key_slice_17_cast_fp16)[name = string("key_tail_17_cast_fp16")]; - int32 var_7489 = const()[name = string("op_7489"), val = int32(2)]; - bool shifted_key_17_interleave_0 = const()[name = string("shifted_key_17_interleave_0"), val = bool(false)]; - tensor shifted_key_17_cast_fp16 = concat(axis = var_7489, interleave = shifted_key_17_interleave_0, values = (key_tail_17_cast_fp16, key_states_37_cast_fp16))[name = string("shifted_key_17_cast_fp16")]; - tensor concat_40 = const()[name = string("concat_40"), val = tensor([8, 0, 0, 0])]; - tensor concat_41 = const()[name = string("concat_41"), val = tensor([9, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_40, begin_mask = model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0, end = concat_41, end_mask = model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_17_stride_0, update = shifted_key_17_cast_fp16, x = coreml_update_state_69)[name = string("model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_70_write_state")]; - tensor coreml_update_state_70 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_70")]; - tensor value_slice_17_begin_0 = const()[name = string("value_slice_17_begin_0"), val = tensor([30, 0, 0, 0])]; - tensor value_slice_17_end_0 = const()[name = string("value_slice_17_end_0"), val = tensor([31, 1, 512, 256])]; - tensor value_slice_17_end_mask_0 = const()[name = string("value_slice_17_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_17_cast_fp16 = slice_by_index(begin = value_slice_17_begin_0, end = value_slice_17_end_0, end_mask = value_slice_17_end_mask_0, x = coreml_update_state_70)[name = string("value_slice_17_cast_fp16")]; - tensor value_tail_17_begin_0 = const()[name = string("value_tail_17_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_17_end_0 = const()[name = string("value_tail_17_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_17_cast_fp16 = slice_by_index(begin = value_tail_17_begin_0, end = value_tail_17_end_0, x = value_slice_17_cast_fp16)[name = string("value_tail_17_cast_fp16")]; - int32 var_7523 = const()[name = string("op_7523"), val = int32(2)]; - bool shifted_value_17_interleave_0 = const()[name = string("shifted_value_17_interleave_0"), val = bool(false)]; - tensor shifted_value_17_cast_fp16 = concat(axis = var_7523, interleave = shifted_value_17_interleave_0, values = (value_tail_17_cast_fp16, var_7363))[name = string("shifted_value_17_cast_fp16")]; - tensor concat_42 = const()[name = string("concat_42"), val = tensor([30, 0, 0, 0])]; - tensor concat_43 = const()[name = string("concat_43"), val = tensor([31, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_18_stride_0, update = shifted_value_17_cast_fp16, x = coreml_update_state_70)[name = string("model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_71_write_state")]; - tensor coreml_update_state_71 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_71")]; - tensor var_7551_begin_0 = const()[name = string("op_7551_begin_0"), val = tensor([8, 0, 0, 0])]; - tensor var_7551_end_0 = const()[name = string("op_7551_end_0"), val = tensor([9, 1, 512, 256])]; - tensor var_7551_end_mask_0 = const()[name = string("op_7551_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_7551_cast_fp16 = slice_by_index(begin = var_7551_begin_0, end = var_7551_end_0, end_mask = var_7551_end_mask_0, x = coreml_update_state_71)[name = string("op_7551_cast_fp16")]; - tensor var_7558_begin_0 = const()[name = string("op_7558_begin_0"), val = tensor([30, 0, 0, 0])]; - tensor var_7558_end_0 = const()[name = string("op_7558_end_0"), val = tensor([31, 1, 512, 256])]; - tensor var_7558_end_mask_0 = const()[name = string("op_7558_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_7558_cast_fp16 = slice_by_index(begin = var_7558_begin_0, end = var_7558_end_0, end_mask = var_7558_end_mask_0, x = coreml_update_state_71)[name = string("op_7558_cast_fp16")]; - tensor var_7595 = const()[name = string("op_7595"), val = tensor([1, 4, 1, 1])]; - tensor x_149_cast_fp16 = tile(reps = var_7595, x = var_7551_cast_fp16)[name = string("x_149_cast_fp16")]; - tensor var_7615 = const()[name = string("op_7615"), val = tensor([1, 4, 1, 1])]; - tensor x_155_cast_fp16 = tile(reps = var_7615, x = var_7558_cast_fp16)[name = string("x_155_cast_fp16")]; - bool var_7642_transpose_x_1 = const()[name = string("op_7642_transpose_x_1"), val = bool(false)]; - bool var_7642_transpose_y_1 = const()[name = string("op_7642_transpose_y_1"), val = bool(true)]; - tensor var_7642 = matmul(transpose_x = var_7642_transpose_x_1, transpose_y = var_7642_transpose_y_1, x = query_states_37_cast_fp16, y = x_149_cast_fp16)[name = string("op_7642")]; - fp16 var_7643_to_fp16 = const()[name = string("op_7643_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_55_cast_fp16 = mul(x = var_7642, y = var_7643_to_fp16)[name = string("attn_weights_55_cast_fp16")]; - tensor attn_weights_57_cast_fp16 = add(x = attn_weights_55_cast_fp16, y = var_2059)[name = string("attn_weights_57_cast_fp16")]; - int32 var_7678 = const()[name = string("op_7678"), val = int32(-1)]; - tensor attn_weights_59_cast_fp16 = softmax(axis = var_7678, x = attn_weights_57_cast_fp16)[name = string("attn_weights_59_cast_fp16")]; - bool attn_output_91_transpose_x_0 = const()[name = string("attn_output_91_transpose_x_0"), val = bool(false)]; - bool attn_output_91_transpose_y_0 = const()[name = string("attn_output_91_transpose_y_0"), val = bool(false)]; - tensor attn_output_91_cast_fp16 = matmul(transpose_x = attn_output_91_transpose_x_0, transpose_y = attn_output_91_transpose_y_0, x = attn_weights_59_cast_fp16, y = x_155_cast_fp16)[name = string("attn_output_91_cast_fp16")]; - tensor var_7689_perm_0 = const()[name = string("op_7689_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_7693 = const()[name = string("op_7693"), val = tensor([1, 1, 1024])]; - tensor var_7689_cast_fp16 = transpose(perm = var_7689_perm_0, x = attn_output_91_cast_fp16)[name = string("transpose_100")]; - tensor attn_output_95_cast_fp16 = reshape(shape = var_7693, x = var_7689_cast_fp16)[name = string("attn_output_95_cast_fp16")]; - tensor var_7698 = const()[name = string("op_7698"), val = tensor([0, 2, 1])]; - string var_7714_pad_type_0 = const()[name = string("op_7714_pad_type_0"), val = string("valid")]; - int32 var_7714_groups_0 = const()[name = string("op_7714_groups_0"), val = int32(1)]; - tensor var_7714_strides_0 = const()[name = string("op_7714_strides_0"), val = tensor([1])]; - tensor var_7714_pad_0 = const()[name = string("op_7714_pad_0"), val = tensor([0, 0])]; - tensor var_7714_dilations_0 = const()[name = string("op_7714_dilations_0"), val = tensor([1])]; - tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223563136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224447936))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_7699_cast_fp16 = transpose(perm = var_7698, x = attn_output_95_cast_fp16)[name = string("transpose_99")]; - tensor var_7714_cast_fp16 = conv(dilations = var_7714_dilations_0, groups = var_7714_groups_0, pad = var_7714_pad_0, pad_type = var_7714_pad_type_0, strides = var_7714_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_7699_cast_fp16)[name = string("op_7714_cast_fp16")]; - tensor var_7718 = const()[name = string("op_7718"), val = tensor([0, 2, 1])]; - int32 var_7729 = const()[name = string("op_7729"), val = int32(-1)]; - fp16 const_368_promoted_to_fp16 = const()[name = string("const_368_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_153_cast_fp16 = transpose(perm = var_7718, x = var_7714_cast_fp16)[name = string("transpose_98")]; - tensor var_7731_cast_fp16 = mul(x = hidden_states_153_cast_fp16, y = const_368_promoted_to_fp16)[name = string("op_7731_cast_fp16")]; - bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; - tensor input_191_cast_fp16 = concat(axis = var_7729, interleave = input_191_interleave_0, values = (hidden_states_153_cast_fp16, var_7731_cast_fp16))[name = string("input_191_cast_fp16")]; - tensor normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor([-1])]; - fp16 var_7726_to_fp16 = const()[name = string("op_7726_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_7726_to_fp16, x = input_191_cast_fp16)[name = string("normed_229_cast_fp16")]; - tensor normed_231_begin_0 = const()[name = string("normed_231_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_231_end_0 = const()[name = string("normed_231_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_231_end_mask_0 = const()[name = string("normed_231_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_231_cast_fp16 = slice_by_index(begin = normed_231_begin_0, end = normed_231_end_0, end_mask = normed_231_end_mask_0, x = normed_229_cast_fp16)[name = string("normed_231_cast_fp16")]; - tensor var_7745_to_fp16 = const()[name = string("op_7745_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224466432)))]; - tensor attn_output_99_cast_fp16 = mul(x = normed_231_cast_fp16, y = var_7745_to_fp16)[name = string("attn_output_99_cast_fp16")]; - tensor hidden_states_155_cast_fp16 = add(x = hidden_states_145_cast_fp16, y = attn_output_99_cast_fp16)[name = string("hidden_states_155_cast_fp16")]; - int32 var_7758 = const()[name = string("op_7758"), val = int32(-1)]; - fp16 const_372_promoted_to_fp16 = const()[name = string("const_372_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7760_cast_fp16 = mul(x = hidden_states_155_cast_fp16, y = const_372_promoted_to_fp16)[name = string("op_7760_cast_fp16")]; - bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; - tensor input_193_cast_fp16 = concat(axis = var_7758, interleave = input_193_interleave_0, values = (hidden_states_155_cast_fp16, var_7760_cast_fp16))[name = string("input_193_cast_fp16")]; - tensor normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor([-1])]; - fp16 var_7755_to_fp16 = const()[name = string("op_7755_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_7755_to_fp16, x = input_193_cast_fp16)[name = string("normed_233_cast_fp16")]; - tensor normed_235_begin_0 = const()[name = string("normed_235_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_235_end_0 = const()[name = string("normed_235_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_235_end_mask_0 = const()[name = string("normed_235_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_235_cast_fp16 = slice_by_index(begin = normed_235_begin_0, end = normed_235_end_0, end_mask = normed_235_end_mask_0, x = normed_233_cast_fp16)[name = string("normed_235_cast_fp16")]; - tensor var_7774_to_fp16 = const()[name = string("op_7774_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224468800)))]; - tensor x_157_cast_fp16 = mul(x = normed_235_cast_fp16, y = var_7774_to_fp16)[name = string("x_157_cast_fp16")]; - tensor var_7786 = const()[name = string("op_7786"), val = tensor([0, 2, 1])]; - tensor input_195_axes_0 = const()[name = string("input_195_axes_0"), val = tensor([2])]; - tensor var_7787_cast_fp16 = transpose(perm = var_7786, x = x_157_cast_fp16)[name = string("transpose_97")]; - tensor input_195_cast_fp16 = expand_dims(axes = input_195_axes_0, x = var_7787_cast_fp16)[name = string("input_195_cast_fp16")]; - string x_159_pad_type_0 = const()[name = string("x_159_pad_type_0"), val = string("valid")]; - tensor x_159_strides_0 = const()[name = string("x_159_strides_0"), val = tensor([1, 1])]; - tensor x_159_pad_0 = const()[name = string("x_159_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_159_dilations_0 = const()[name = string("x_159_dilations_0"), val = tensor([1, 1])]; - int32 x_159_groups_0 = const()[name = string("x_159_groups_0"), val = int32(1)]; - tensor model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(670456704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676428736))))[name = string("model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_159_cast_fp16 = conv(dilations = x_159_dilations_0, groups = x_159_groups_0, pad = x_159_pad_0, pad_type = x_159_pad_type_0, strides = x_159_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_195_cast_fp16)[name = string("x_159_cast_fp16")]; - string b_19_pad_type_0 = const()[name = string("b_19_pad_type_0"), val = string("valid")]; - tensor b_19_strides_0 = const()[name = string("b_19_strides_0"), val = tensor([1, 1])]; - tensor b_19_pad_0 = const()[name = string("b_19_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_19_dilations_0 = const()[name = string("b_19_dilations_0"), val = tensor([1, 1])]; - int32 b_19_groups_0 = const()[name = string("b_19_groups_0"), val = int32(1)]; - tensor model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676539392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(682511424))))[name = string("model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_19_cast_fp16 = conv(dilations = b_19_dilations_0, groups = b_19_groups_0, pad = b_19_pad_0, pad_type = b_19_pad_type_0, strides = b_19_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_195_cast_fp16)[name = string("b_19_cast_fp16")]; - string var_7812_mode_0 = const()[name = string("op_7812_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_7812_cast_fp16 = gelu(mode = var_7812_mode_0, x = x_159_cast_fp16)[name = string("op_7812_cast_fp16")]; - tensor input_197_cast_fp16 = mul(x = var_7812_cast_fp16, y = b_19_cast_fp16)[name = string("input_197_cast_fp16")]; - string e_19_pad_type_0 = const()[name = string("e_19_pad_type_0"), val = string("valid")]; - tensor e_19_strides_0 = const()[name = string("e_19_strides_0"), val = tensor([1, 1])]; - tensor e_19_pad_0 = const()[name = string("e_19_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_19_dilations_0 = const()[name = string("e_19_dilations_0"), val = tensor([1, 1])]; - int32 e_19_groups_0 = const()[name = string("e_19_groups_0"), val = int32(1)]; - tensor model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236636544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242608576))))[name = string("model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_19_cast_fp16 = conv(dilations = e_19_dilations_0, groups = e_19_groups_0, pad = e_19_pad_0, pad_type = e_19_pad_type_0, strides = e_19_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_197_cast_fp16)[name = string("e_19_cast_fp16")]; - tensor var_7820_axes_0 = const()[name = string("op_7820_axes_0"), val = tensor([2])]; - tensor var_7820_cast_fp16 = squeeze(axes = var_7820_axes_0, x = e_19_cast_fp16)[name = string("op_7820_cast_fp16")]; - tensor var_7821 = const()[name = string("op_7821"), val = tensor([0, 2, 1])]; - int32 var_7832 = const()[name = string("op_7832"), val = int32(-1)]; - fp16 const_376_promoted_to_fp16 = const()[name = string("const_376_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_157_cast_fp16 = transpose(perm = var_7821, x = var_7820_cast_fp16)[name = string("transpose_96")]; - tensor var_7834_cast_fp16 = mul(x = hidden_states_157_cast_fp16, y = const_376_promoted_to_fp16)[name = string("op_7834_cast_fp16")]; - bool input_199_interleave_0 = const()[name = string("input_199_interleave_0"), val = bool(false)]; - tensor input_199_cast_fp16 = concat(axis = var_7832, interleave = input_199_interleave_0, values = (hidden_states_157_cast_fp16, var_7834_cast_fp16))[name = string("input_199_cast_fp16")]; - tensor normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor([-1])]; - fp16 var_7829_to_fp16 = const()[name = string("op_7829_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_7829_to_fp16, x = input_199_cast_fp16)[name = string("normed_237_cast_fp16")]; - tensor normed_239_begin_0 = const()[name = string("normed_239_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_239_end_0 = const()[name = string("normed_239_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_239_end_mask_0 = const()[name = string("normed_239_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_239_cast_fp16 = slice_by_index(begin = normed_239_begin_0, end = normed_239_end_0, end_mask = normed_239_end_mask_0, x = normed_237_cast_fp16)[name = string("normed_239_cast_fp16")]; - tensor var_7848_to_fp16 = const()[name = string("op_7848_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242627072)))]; - tensor hidden_states_159_cast_fp16 = mul(x = normed_239_cast_fp16, y = var_7848_to_fp16)[name = string("hidden_states_159_cast_fp16")]; - tensor hidden_states_161_cast_fp16 = add(x = hidden_states_155_cast_fp16, y = hidden_states_159_cast_fp16)[name = string("hidden_states_161_cast_fp16")]; - int32 var_7899 = const()[name = string("op_7899"), val = int32(-1)]; - fp16 const_380_promoted_to_fp16 = const()[name = string("const_380_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7901_cast_fp16 = mul(x = hidden_states_161_cast_fp16, y = const_380_promoted_to_fp16)[name = string("op_7901_cast_fp16")]; - bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)]; - tensor input_201_cast_fp16 = concat(axis = var_7899, interleave = input_201_interleave_0, values = (hidden_states_161_cast_fp16, var_7901_cast_fp16))[name = string("input_201_cast_fp16")]; - tensor normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor([-1])]; - fp16 var_7896_to_fp16 = const()[name = string("op_7896_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_7896_to_fp16, x = input_201_cast_fp16)[name = string("normed_241_cast_fp16")]; - tensor normed_243_begin_0 = const()[name = string("normed_243_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_243_end_0 = const()[name = string("normed_243_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_243_end_mask_0 = const()[name = string("normed_243_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_243_cast_fp16 = slice_by_index(begin = normed_243_begin_0, end = normed_243_end_0, end_mask = normed_243_end_mask_0, x = normed_241_cast_fp16)[name = string("normed_243_cast_fp16")]; - tensor var_7915_to_fp16 = const()[name = string("op_7915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242629440)))]; - tensor hidden_states_163_cast_fp16 = mul(x = normed_243_cast_fp16, y = var_7915_to_fp16)[name = string("hidden_states_163_cast_fp16")]; - tensor var_7920 = const()[name = string("op_7920"), val = tensor([0, 2, 1])]; - tensor var_7923_axes_0 = const()[name = string("op_7923_axes_0"), val = tensor([2])]; - tensor var_7921_cast_fp16 = transpose(perm = var_7920, x = hidden_states_163_cast_fp16)[name = string("transpose_95")]; - tensor var_7923_cast_fp16 = expand_dims(axes = var_7923_axes_0, x = var_7921_cast_fp16)[name = string("op_7923_cast_fp16")]; - string var_7939_pad_type_0 = const()[name = string("op_7939_pad_type_0"), val = string("valid")]; - tensor var_7939_strides_0 = const()[name = string("op_7939_strides_0"), val = tensor([1, 1])]; - tensor var_7939_pad_0 = const()[name = string("op_7939_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_7939_dilations_0 = const()[name = string("op_7939_dilations_0"), val = tensor([1, 1])]; - int32 var_7939_groups_0 = const()[name = string("op_7939_groups_0"), val = int32(1)]; - tensor var_7939 = conv(dilations = var_7939_dilations_0, groups = var_7939_groups_0, pad = var_7939_pad_0, pad_type = var_7939_pad_type_0, strides = var_7939_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_7923_cast_fp16)[name = string("op_7939")]; - tensor var_7944 = const()[name = string("op_7944"), val = tensor([1, 4, 1, 256])]; - tensor var_7945 = reshape(shape = var_7944, x = var_7939)[name = string("op_7945")]; - string var_7961_pad_type_0 = const()[name = string("op_7961_pad_type_0"), val = string("valid")]; - tensor var_7961_strides_0 = const()[name = string("op_7961_strides_0"), val = tensor([1, 1])]; - tensor var_7961_pad_0 = const()[name = string("op_7961_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_7961_dilations_0 = const()[name = string("op_7961_dilations_0"), val = tensor([1, 1])]; - int32 var_7961_groups_0 = const()[name = string("op_7961_groups_0"), val = int32(1)]; - tensor var_7961 = conv(dilations = var_7961_dilations_0, groups = var_7961_groups_0, pad = var_7961_pad_0, pad_type = var_7961_pad_type_0, strides = var_7961_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_7923_cast_fp16)[name = string("op_7961")]; - tensor var_7966 = const()[name = string("op_7966"), val = tensor([1, 1, 1, 256])]; - tensor var_7967 = reshape(shape = var_7966, x = var_7961)[name = string("op_7967")]; - string var_7983_pad_type_0 = const()[name = string("op_7983_pad_type_0"), val = string("valid")]; - tensor var_7983_strides_0 = const()[name = string("op_7983_strides_0"), val = tensor([1, 1])]; - tensor var_7983_pad_0 = const()[name = string("op_7983_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_7983_dilations_0 = const()[name = string("op_7983_dilations_0"), val = tensor([1, 1])]; - int32 var_7983_groups_0 = const()[name = string("op_7983_groups_0"), val = int32(1)]; - tensor var_7983 = conv(dilations = var_7983_dilations_0, groups = var_7983_groups_0, pad = var_7983_pad_0, pad_type = var_7983_pad_type_0, strides = var_7983_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_7923_cast_fp16)[name = string("op_7983")]; - tensor var_7988 = const()[name = string("op_7988"), val = tensor([1, 1, 1, 256])]; - tensor var_7989 = reshape(shape = var_7988, x = var_7983)[name = string("op_7989")]; - int32 var_8004 = const()[name = string("op_8004"), val = int32(-1)]; - fp16 const_384_promoted = const()[name = string("const_384_promoted"), val = fp16(-0x1p+0)]; - tensor var_8006 = mul(x = var_7945, y = const_384_promoted)[name = string("op_8006")]; - bool input_205_interleave_0 = const()[name = string("input_205_interleave_0"), val = bool(false)]; - tensor input_205 = concat(axis = var_8004, interleave = input_205_interleave_0, values = (var_7945, var_8006))[name = string("input_205")]; - tensor normed_245_axes_0 = const()[name = string("normed_245_axes_0"), val = tensor([-1])]; - fp16 var_8001_to_fp16 = const()[name = string("op_8001_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_245_cast_fp16 = layer_norm(axes = normed_245_axes_0, epsilon = var_8001_to_fp16, x = input_205)[name = string("normed_245_cast_fp16")]; - tensor normed_247_begin_0 = const()[name = string("normed_247_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_247_end_0 = const()[name = string("normed_247_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_247_end_mask_0 = const()[name = string("normed_247_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_247 = slice_by_index(begin = normed_247_begin_0, end = normed_247_end_0, end_mask = normed_247_end_mask_0, x = normed_245_cast_fp16)[name = string("normed_247")]; - tensor var_8020_to_fp16 = const()[name = string("op_8020_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242631808)))]; - tensor q_21_cast_fp16 = mul(x = normed_247, y = var_8020_to_fp16)[name = string("q_21_cast_fp16")]; - int32 var_8031 = const()[name = string("op_8031"), val = int32(-1)]; - fp16 const_388_promoted = const()[name = string("const_388_promoted"), val = fp16(-0x1p+0)]; - tensor var_8033 = mul(x = var_7967, y = const_388_promoted)[name = string("op_8033")]; - bool input_207_interleave_0 = const()[name = string("input_207_interleave_0"), val = bool(false)]; - tensor input_207 = concat(axis = var_8031, interleave = input_207_interleave_0, values = (var_7967, var_8033))[name = string("input_207")]; - tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; - fp16 var_8028_to_fp16 = const()[name = string("op_8028_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_8028_to_fp16, x = input_207)[name = string("normed_249_cast_fp16")]; - tensor normed_251_begin_0 = const()[name = string("normed_251_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_251_end_0 = const()[name = string("normed_251_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_251_end_mask_0 = const()[name = string("normed_251_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_251 = slice_by_index(begin = normed_251_begin_0, end = normed_251_end_0, end_mask = normed_251_end_mask_0, x = normed_249_cast_fp16)[name = string("normed_251")]; - tensor var_8047_to_fp16 = const()[name = string("op_8047_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242632384)))]; - tensor k_21_cast_fp16 = mul(x = normed_251, y = var_8047_to_fp16)[name = string("k_21_cast_fp16")]; - tensor var_8049_cast_fp16 = mul(x = q_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8049_cast_fp16")]; - tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21_cast_fp16)[name = string("x1_41_cast_fp16")]; - tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21_cast_fp16)[name = string("x2_41_cast_fp16")]; - fp16 const_394_promoted_to_fp16 = const()[name = string("const_394_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8070_cast_fp16 = mul(x = x2_41_cast_fp16, y = const_394_promoted_to_fp16)[name = string("op_8070_cast_fp16")]; - int32 var_8072 = const()[name = string("op_8072"), val = int32(-1)]; - bool var_8073_interleave_0 = const()[name = string("op_8073_interleave_0"), val = bool(false)]; - tensor var_8073_cast_fp16 = concat(axis = var_8072, interleave = var_8073_interleave_0, values = (var_8070_cast_fp16, x1_41_cast_fp16))[name = string("op_8073_cast_fp16")]; - tensor var_8074_cast_fp16 = mul(x = var_8073_cast_fp16, y = sin_1_cast_fp16)[name = string("op_8074_cast_fp16")]; - tensor query_states_41_cast_fp16 = add(x = var_8049_cast_fp16, y = var_8074_cast_fp16)[name = string("query_states_41_cast_fp16")]; - tensor var_8077_cast_fp16 = mul(x = k_21_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8077_cast_fp16")]; - tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21_cast_fp16)[name = string("x1_43_cast_fp16")]; - tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21_cast_fp16)[name = string("x2_43_cast_fp16")]; - fp16 const_397_promoted_to_fp16 = const()[name = string("const_397_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8098_cast_fp16 = mul(x = x2_43_cast_fp16, y = const_397_promoted_to_fp16)[name = string("op_8098_cast_fp16")]; - int32 var_8100 = const()[name = string("op_8100"), val = int32(-1)]; - bool var_8101_interleave_0 = const()[name = string("op_8101_interleave_0"), val = bool(false)]; - tensor var_8101_cast_fp16 = concat(axis = var_8100, interleave = var_8101_interleave_0, values = (var_8098_cast_fp16, x1_43_cast_fp16))[name = string("op_8101_cast_fp16")]; - tensor var_8102_cast_fp16 = mul(x = var_8101_cast_fp16, y = sin_1_cast_fp16)[name = string("op_8102_cast_fp16")]; - tensor key_states_41_cast_fp16 = add(x = var_8077_cast_fp16, y = var_8102_cast_fp16)[name = string("key_states_41_cast_fp16")]; - tensor key_slice_19_begin_0 = const()[name = string("key_slice_19_begin_0"), val = tensor([9, 0, 0, 0])]; - tensor key_slice_19_end_0 = const()[name = string("key_slice_19_end_0"), val = tensor([10, 1, 512, 256])]; - tensor key_slice_19_end_mask_0 = const()[name = string("key_slice_19_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_19_cast_fp16 = slice_by_index(begin = key_slice_19_begin_0, end = key_slice_19_end_0, end_mask = key_slice_19_end_mask_0, x = coreml_update_state_71)[name = string("key_slice_19_cast_fp16")]; - tensor key_tail_19_begin_0 = const()[name = string("key_tail_19_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_19_end_0 = const()[name = string("key_tail_19_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_19_cast_fp16 = slice_by_index(begin = key_tail_19_begin_0, end = key_tail_19_end_0, x = key_slice_19_cast_fp16)[name = string("key_tail_19_cast_fp16")]; - int32 var_8115 = const()[name = string("op_8115"), val = int32(2)]; - bool shifted_key_19_interleave_0 = const()[name = string("shifted_key_19_interleave_0"), val = bool(false)]; - tensor shifted_key_19_cast_fp16 = concat(axis = var_8115, interleave = shifted_key_19_interleave_0, values = (key_tail_19_cast_fp16, key_states_41_cast_fp16))[name = string("shifted_key_19_cast_fp16")]; - tensor concat_44 = const()[name = string("concat_44"), val = tensor([9, 0, 0, 0])]; - tensor concat_45 = const()[name = string("concat_45"), val = tensor([10, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_44, begin_mask = model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0, end = concat_45, end_mask = model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_19_stride_0, update = shifted_key_19_cast_fp16, x = coreml_update_state_71)[name = string("model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_72_write_state")]; - tensor coreml_update_state_72 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_72")]; - tensor value_slice_19_begin_0 = const()[name = string("value_slice_19_begin_0"), val = tensor([31, 0, 0, 0])]; - tensor value_slice_19_end_0 = const()[name = string("value_slice_19_end_0"), val = tensor([32, 1, 512, 256])]; - tensor value_slice_19_end_mask_0 = const()[name = string("value_slice_19_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_19_cast_fp16 = slice_by_index(begin = value_slice_19_begin_0, end = value_slice_19_end_0, end_mask = value_slice_19_end_mask_0, x = coreml_update_state_72)[name = string("value_slice_19_cast_fp16")]; - tensor value_tail_19_begin_0 = const()[name = string("value_tail_19_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_19_end_0 = const()[name = string("value_tail_19_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_19_cast_fp16 = slice_by_index(begin = value_tail_19_begin_0, end = value_tail_19_end_0, x = value_slice_19_cast_fp16)[name = string("value_tail_19_cast_fp16")]; - int32 var_8149 = const()[name = string("op_8149"), val = int32(2)]; - bool shifted_value_19_interleave_0 = const()[name = string("shifted_value_19_interleave_0"), val = bool(false)]; - tensor shifted_value_19_cast_fp16 = concat(axis = var_8149, interleave = shifted_value_19_interleave_0, values = (value_tail_19_cast_fp16, var_7989))[name = string("shifted_value_19_cast_fp16")]; - tensor concat_46 = const()[name = string("concat_46"), val = tensor([31, 0, 0, 0])]; - tensor concat_47 = const()[name = string("concat_47"), val = tensor([32, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_46, begin_mask = model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0, end = concat_47, end_mask = model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_20_stride_0, update = shifted_value_19_cast_fp16, x = coreml_update_state_72)[name = string("model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_73_write_state")]; - tensor coreml_update_state_73 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_73")]; - tensor var_8177_begin_0 = const()[name = string("op_8177_begin_0"), val = tensor([9, 0, 0, 0])]; - tensor var_8177_end_0 = const()[name = string("op_8177_end_0"), val = tensor([10, 1, 512, 256])]; - tensor var_8177_end_mask_0 = const()[name = string("op_8177_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_8177_cast_fp16 = slice_by_index(begin = var_8177_begin_0, end = var_8177_end_0, end_mask = var_8177_end_mask_0, x = coreml_update_state_73)[name = string("op_8177_cast_fp16")]; - tensor var_8184_begin_0 = const()[name = string("op_8184_begin_0"), val = tensor([31, 0, 0, 0])]; - tensor var_8184_end_0 = const()[name = string("op_8184_end_0"), val = tensor([32, 1, 512, 256])]; - tensor var_8184_end_mask_0 = const()[name = string("op_8184_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_8184_cast_fp16 = slice_by_index(begin = var_8184_begin_0, end = var_8184_end_0, end_mask = var_8184_end_mask_0, x = coreml_update_state_73)[name = string("op_8184_cast_fp16")]; - tensor var_8221 = const()[name = string("op_8221"), val = tensor([1, 4, 1, 1])]; - tensor x_165_cast_fp16 = tile(reps = var_8221, x = var_8177_cast_fp16)[name = string("x_165_cast_fp16")]; - tensor var_8241 = const()[name = string("op_8241"), val = tensor([1, 4, 1, 1])]; - tensor x_171_cast_fp16 = tile(reps = var_8241, x = var_8184_cast_fp16)[name = string("x_171_cast_fp16")]; - bool var_8268_transpose_x_1 = const()[name = string("op_8268_transpose_x_1"), val = bool(false)]; - bool var_8268_transpose_y_1 = const()[name = string("op_8268_transpose_y_1"), val = bool(true)]; - tensor var_8268 = matmul(transpose_x = var_8268_transpose_x_1, transpose_y = var_8268_transpose_y_1, x = query_states_41_cast_fp16, y = x_165_cast_fp16)[name = string("op_8268")]; - fp16 var_8269_to_fp16 = const()[name = string("op_8269_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_61_cast_fp16 = mul(x = var_8268, y = var_8269_to_fp16)[name = string("attn_weights_61_cast_fp16")]; - tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = var_2059)[name = string("attn_weights_63_cast_fp16")]; - int32 var_8304 = const()[name = string("op_8304"), val = int32(-1)]; - tensor attn_weights_65_cast_fp16 = softmax(axis = var_8304, x = attn_weights_63_cast_fp16)[name = string("attn_weights_65_cast_fp16")]; - bool attn_output_101_transpose_x_0 = const()[name = string("attn_output_101_transpose_x_0"), val = bool(false)]; - bool attn_output_101_transpose_y_0 = const()[name = string("attn_output_101_transpose_y_0"), val = bool(false)]; - tensor attn_output_101_cast_fp16 = matmul(transpose_x = attn_output_101_transpose_x_0, transpose_y = attn_output_101_transpose_y_0, x = attn_weights_65_cast_fp16, y = x_171_cast_fp16)[name = string("attn_output_101_cast_fp16")]; - tensor var_8315_perm_0 = const()[name = string("op_8315_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_8319 = const()[name = string("op_8319"), val = tensor([1, 1, 1024])]; - tensor var_8315_cast_fp16 = transpose(perm = var_8315_perm_0, x = attn_output_101_cast_fp16)[name = string("transpose_94")]; - tensor attn_output_105_cast_fp16 = reshape(shape = var_8319, x = var_8315_cast_fp16)[name = string("attn_output_105_cast_fp16")]; - tensor var_8324 = const()[name = string("op_8324"), val = tensor([0, 2, 1])]; - string var_8340_pad_type_0 = const()[name = string("op_8340_pad_type_0"), val = string("valid")]; - int32 var_8340_groups_0 = const()[name = string("op_8340_groups_0"), val = int32(1)]; - tensor var_8340_strides_0 = const()[name = string("op_8340_strides_0"), val = tensor([1])]; - tensor var_8340_pad_0 = const()[name = string("op_8340_pad_0"), val = tensor([0, 0])]; - tensor var_8340_dilations_0 = const()[name = string("op_8340_dilations_0"), val = tensor([1])]; - tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242632960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243517760))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_8325_cast_fp16 = transpose(perm = var_8324, x = attn_output_105_cast_fp16)[name = string("transpose_93")]; - tensor var_8340_cast_fp16 = conv(dilations = var_8340_dilations_0, groups = var_8340_groups_0, pad = var_8340_pad_0, pad_type = var_8340_pad_type_0, strides = var_8340_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_8325_cast_fp16)[name = string("op_8340_cast_fp16")]; - tensor var_8344 = const()[name = string("op_8344"), val = tensor([0, 2, 1])]; - int32 var_8355 = const()[name = string("op_8355"), val = int32(-1)]; - fp16 const_406_promoted_to_fp16 = const()[name = string("const_406_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_169_cast_fp16 = transpose(perm = var_8344, x = var_8340_cast_fp16)[name = string("transpose_92")]; - tensor var_8357_cast_fp16 = mul(x = hidden_states_169_cast_fp16, y = const_406_promoted_to_fp16)[name = string("op_8357_cast_fp16")]; - bool input_211_interleave_0 = const()[name = string("input_211_interleave_0"), val = bool(false)]; - tensor input_211_cast_fp16 = concat(axis = var_8355, interleave = input_211_interleave_0, values = (hidden_states_169_cast_fp16, var_8357_cast_fp16))[name = string("input_211_cast_fp16")]; - tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; - fp16 var_8352_to_fp16 = const()[name = string("op_8352_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_8352_to_fp16, x = input_211_cast_fp16)[name = string("normed_253_cast_fp16")]; - tensor normed_255_begin_0 = const()[name = string("normed_255_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_255_end_0 = const()[name = string("normed_255_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_255_end_mask_0 = const()[name = string("normed_255_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_255_cast_fp16 = slice_by_index(begin = normed_255_begin_0, end = normed_255_end_0, end_mask = normed_255_end_mask_0, x = normed_253_cast_fp16)[name = string("normed_255_cast_fp16")]; - tensor var_8371_to_fp16 = const()[name = string("op_8371_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243536256)))]; - tensor attn_output_109_cast_fp16 = mul(x = normed_255_cast_fp16, y = var_8371_to_fp16)[name = string("attn_output_109_cast_fp16")]; - tensor hidden_states_171_cast_fp16 = add(x = hidden_states_161_cast_fp16, y = attn_output_109_cast_fp16)[name = string("hidden_states_171_cast_fp16")]; - int32 var_8384 = const()[name = string("op_8384"), val = int32(-1)]; - fp16 const_410_promoted_to_fp16 = const()[name = string("const_410_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8386_cast_fp16 = mul(x = hidden_states_171_cast_fp16, y = const_410_promoted_to_fp16)[name = string("op_8386_cast_fp16")]; - bool input_213_interleave_0 = const()[name = string("input_213_interleave_0"), val = bool(false)]; - tensor input_213_cast_fp16 = concat(axis = var_8384, interleave = input_213_interleave_0, values = (hidden_states_171_cast_fp16, var_8386_cast_fp16))[name = string("input_213_cast_fp16")]; - tensor normed_257_axes_0 = const()[name = string("normed_257_axes_0"), val = tensor([-1])]; - fp16 var_8381_to_fp16 = const()[name = string("op_8381_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_257_cast_fp16 = layer_norm(axes = normed_257_axes_0, epsilon = var_8381_to_fp16, x = input_213_cast_fp16)[name = string("normed_257_cast_fp16")]; - tensor normed_259_begin_0 = const()[name = string("normed_259_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_259_end_0 = const()[name = string("normed_259_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_259_end_mask_0 = const()[name = string("normed_259_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_259_cast_fp16 = slice_by_index(begin = normed_259_begin_0, end = normed_259_end_0, end_mask = normed_259_end_mask_0, x = normed_257_cast_fp16)[name = string("normed_259_cast_fp16")]; - tensor var_8400_to_fp16 = const()[name = string("op_8400_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243538624)))]; - tensor x_173_cast_fp16 = mul(x = normed_259_cast_fp16, y = var_8400_to_fp16)[name = string("x_173_cast_fp16")]; - tensor var_8412 = const()[name = string("op_8412"), val = tensor([0, 2, 1])]; - tensor input_215_axes_0 = const()[name = string("input_215_axes_0"), val = tensor([2])]; - tensor var_8413_cast_fp16 = transpose(perm = var_8412, x = x_173_cast_fp16)[name = string("transpose_91")]; - tensor input_215_cast_fp16 = expand_dims(axes = input_215_axes_0, x = var_8413_cast_fp16)[name = string("input_215_cast_fp16")]; - string x_175_pad_type_0 = const()[name = string("x_175_pad_type_0"), val = string("valid")]; - tensor x_175_strides_0 = const()[name = string("x_175_strides_0"), val = tensor([1, 1])]; - tensor x_175_pad_0 = const()[name = string("x_175_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_175_dilations_0 = const()[name = string("x_175_dilations_0"), val = tensor([1, 1])]; - int32 x_175_groups_0 = const()[name = string("x_175_groups_0"), val = int32(1)]; - tensor model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(682622080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(688594112))))[name = string("model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_175_cast_fp16 = conv(dilations = x_175_dilations_0, groups = x_175_groups_0, pad = x_175_pad_0, pad_type = x_175_pad_type_0, strides = x_175_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_215_cast_fp16)[name = string("x_175_cast_fp16")]; - string b_21_pad_type_0 = const()[name = string("b_21_pad_type_0"), val = string("valid")]; - tensor b_21_strides_0 = const()[name = string("b_21_strides_0"), val = tensor([1, 1])]; - tensor b_21_pad_0 = const()[name = string("b_21_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_21_dilations_0 = const()[name = string("b_21_dilations_0"), val = tensor([1, 1])]; - int32 b_21_groups_0 = const()[name = string("b_21_groups_0"), val = int32(1)]; - tensor model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(688704768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(694676800))))[name = string("model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_21_cast_fp16 = conv(dilations = b_21_dilations_0, groups = b_21_groups_0, pad = b_21_pad_0, pad_type = b_21_pad_type_0, strides = b_21_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_215_cast_fp16)[name = string("b_21_cast_fp16")]; - string var_8438_mode_0 = const()[name = string("op_8438_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_8438_cast_fp16 = gelu(mode = var_8438_mode_0, x = x_175_cast_fp16)[name = string("op_8438_cast_fp16")]; - tensor input_217_cast_fp16 = mul(x = var_8438_cast_fp16, y = b_21_cast_fp16)[name = string("input_217_cast_fp16")]; - string e_21_pad_type_0 = const()[name = string("e_21_pad_type_0"), val = string("valid")]; - tensor e_21_strides_0 = const()[name = string("e_21_strides_0"), val = tensor([1, 1])]; - tensor e_21_pad_0 = const()[name = string("e_21_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_21_dilations_0 = const()[name = string("e_21_dilations_0"), val = tensor([1, 1])]; - int32 e_21_groups_0 = const()[name = string("e_21_groups_0"), val = int32(1)]; - tensor model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255706368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261678400))))[name = string("model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_21_cast_fp16 = conv(dilations = e_21_dilations_0, groups = e_21_groups_0, pad = e_21_pad_0, pad_type = e_21_pad_type_0, strides = e_21_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_217_cast_fp16)[name = string("e_21_cast_fp16")]; - tensor var_8446_axes_0 = const()[name = string("op_8446_axes_0"), val = tensor([2])]; - tensor var_8446_cast_fp16 = squeeze(axes = var_8446_axes_0, x = e_21_cast_fp16)[name = string("op_8446_cast_fp16")]; - tensor var_8447 = const()[name = string("op_8447"), val = tensor([0, 2, 1])]; - int32 var_8458 = const()[name = string("op_8458"), val = int32(-1)]; - fp16 const_414_promoted_to_fp16 = const()[name = string("const_414_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_173_cast_fp16 = transpose(perm = var_8447, x = var_8446_cast_fp16)[name = string("transpose_90")]; - tensor var_8460_cast_fp16 = mul(x = hidden_states_173_cast_fp16, y = const_414_promoted_to_fp16)[name = string("op_8460_cast_fp16")]; - bool input_219_interleave_0 = const()[name = string("input_219_interleave_0"), val = bool(false)]; - tensor input_219_cast_fp16 = concat(axis = var_8458, interleave = input_219_interleave_0, values = (hidden_states_173_cast_fp16, var_8460_cast_fp16))[name = string("input_219_cast_fp16")]; - tensor normed_261_axes_0 = const()[name = string("normed_261_axes_0"), val = tensor([-1])]; - fp16 var_8455_to_fp16 = const()[name = string("op_8455_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_261_cast_fp16 = layer_norm(axes = normed_261_axes_0, epsilon = var_8455_to_fp16, x = input_219_cast_fp16)[name = string("normed_261_cast_fp16")]; - tensor normed_263_begin_0 = const()[name = string("normed_263_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_263_end_0 = const()[name = string("normed_263_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_263_end_mask_0 = const()[name = string("normed_263_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_263_cast_fp16 = slice_by_index(begin = normed_263_begin_0, end = normed_263_end_0, end_mask = normed_263_end_mask_0, x = normed_261_cast_fp16)[name = string("normed_263_cast_fp16")]; - tensor var_8474_to_fp16 = const()[name = string("op_8474_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261696896)))]; - tensor hidden_states_175_cast_fp16 = mul(x = normed_263_cast_fp16, y = var_8474_to_fp16)[name = string("hidden_states_175_cast_fp16")]; - tensor hidden_states_177_cast_fp16 = add(x = hidden_states_171_cast_fp16, y = hidden_states_175_cast_fp16)[name = string("hidden_states_177_cast_fp16")]; - int32 var_8525 = const()[name = string("op_8525"), val = int32(-1)]; - fp16 const_418_promoted_to_fp16 = const()[name = string("const_418_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8527_cast_fp16 = mul(x = hidden_states_177_cast_fp16, y = const_418_promoted_to_fp16)[name = string("op_8527_cast_fp16")]; - bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)]; - tensor input_221_cast_fp16 = concat(axis = var_8525, interleave = input_221_interleave_0, values = (hidden_states_177_cast_fp16, var_8527_cast_fp16))[name = string("input_221_cast_fp16")]; - tensor normed_265_axes_0 = const()[name = string("normed_265_axes_0"), val = tensor([-1])]; - fp16 var_8522_to_fp16 = const()[name = string("op_8522_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_265_cast_fp16 = layer_norm(axes = normed_265_axes_0, epsilon = var_8522_to_fp16, x = input_221_cast_fp16)[name = string("normed_265_cast_fp16")]; - tensor normed_267_begin_0 = const()[name = string("normed_267_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_267_end_0 = const()[name = string("normed_267_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_267_end_mask_0 = const()[name = string("normed_267_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_267_cast_fp16 = slice_by_index(begin = normed_267_begin_0, end = normed_267_end_0, end_mask = normed_267_end_mask_0, x = normed_265_cast_fp16)[name = string("normed_267_cast_fp16")]; - tensor var_8541_to_fp16 = const()[name = string("op_8541_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261699264)))]; - tensor hidden_states_179_cast_fp16 = mul(x = normed_267_cast_fp16, y = var_8541_to_fp16)[name = string("hidden_states_179_cast_fp16")]; - tensor var_8546 = const()[name = string("op_8546"), val = tensor([0, 2, 1])]; - tensor var_8549_axes_0 = const()[name = string("op_8549_axes_0"), val = tensor([2])]; - tensor var_8547_cast_fp16 = transpose(perm = var_8546, x = hidden_states_179_cast_fp16)[name = string("transpose_89")]; - tensor var_8549_cast_fp16 = expand_dims(axes = var_8549_axes_0, x = var_8547_cast_fp16)[name = string("op_8549_cast_fp16")]; - string var_8565_pad_type_0 = const()[name = string("op_8565_pad_type_0"), val = string("valid")]; - tensor var_8565_strides_0 = const()[name = string("op_8565_strides_0"), val = tensor([1, 1])]; - tensor var_8565_pad_0 = const()[name = string("op_8565_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_8565_dilations_0 = const()[name = string("op_8565_dilations_0"), val = tensor([1, 1])]; - int32 var_8565_groups_0 = const()[name = string("op_8565_groups_0"), val = int32(1)]; - tensor var_8565 = conv(dilations = var_8565_dilations_0, groups = var_8565_groups_0, pad = var_8565_pad_0, pad_type = var_8565_pad_type_0, strides = var_8565_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_8549_cast_fp16)[name = string("op_8565")]; - tensor var_8570 = const()[name = string("op_8570"), val = tensor([1, 4, 1, 256])]; - tensor var_8571 = reshape(shape = var_8570, x = var_8565)[name = string("op_8571")]; - string var_8587_pad_type_0 = const()[name = string("op_8587_pad_type_0"), val = string("valid")]; - tensor var_8587_strides_0 = const()[name = string("op_8587_strides_0"), val = tensor([1, 1])]; - tensor var_8587_pad_0 = const()[name = string("op_8587_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_8587_dilations_0 = const()[name = string("op_8587_dilations_0"), val = tensor([1, 1])]; - int32 var_8587_groups_0 = const()[name = string("op_8587_groups_0"), val = int32(1)]; - tensor var_8587 = conv(dilations = var_8587_dilations_0, groups = var_8587_groups_0, pad = var_8587_pad_0, pad_type = var_8587_pad_type_0, strides = var_8587_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_8549_cast_fp16)[name = string("op_8587")]; - tensor var_8592 = const()[name = string("op_8592"), val = tensor([1, 1, 1, 256])]; - tensor var_8593 = reshape(shape = var_8592, x = var_8587)[name = string("op_8593")]; - string var_8609_pad_type_0 = const()[name = string("op_8609_pad_type_0"), val = string("valid")]; - tensor var_8609_strides_0 = const()[name = string("op_8609_strides_0"), val = tensor([1, 1])]; - tensor var_8609_pad_0 = const()[name = string("op_8609_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_8609_dilations_0 = const()[name = string("op_8609_dilations_0"), val = tensor([1, 1])]; - int32 var_8609_groups_0 = const()[name = string("op_8609_groups_0"), val = int32(1)]; - tensor var_8609 = conv(dilations = var_8609_dilations_0, groups = var_8609_groups_0, pad = var_8609_pad_0, pad_type = var_8609_pad_type_0, strides = var_8609_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_8549_cast_fp16)[name = string("op_8609")]; - tensor var_8614 = const()[name = string("op_8614"), val = tensor([1, 1, 1, 256])]; - tensor var_8615 = reshape(shape = var_8614, x = var_8609)[name = string("op_8615")]; - int32 var_8630 = const()[name = string("op_8630"), val = int32(-1)]; - fp16 const_422_promoted = const()[name = string("const_422_promoted"), val = fp16(-0x1p+0)]; - tensor var_8632 = mul(x = var_8571, y = const_422_promoted)[name = string("op_8632")]; - bool input_225_interleave_0 = const()[name = string("input_225_interleave_0"), val = bool(false)]; - tensor input_225 = concat(axis = var_8630, interleave = input_225_interleave_0, values = (var_8571, var_8632))[name = string("input_225")]; - tensor normed_269_axes_0 = const()[name = string("normed_269_axes_0"), val = tensor([-1])]; - fp16 var_8627_to_fp16 = const()[name = string("op_8627_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_269_cast_fp16 = layer_norm(axes = normed_269_axes_0, epsilon = var_8627_to_fp16, x = input_225)[name = string("normed_269_cast_fp16")]; - tensor normed_271_begin_0 = const()[name = string("normed_271_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_271_end_0 = const()[name = string("normed_271_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_271_end_mask_0 = const()[name = string("normed_271_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_271 = slice_by_index(begin = normed_271_begin_0, end = normed_271_end_0, end_mask = normed_271_end_mask_0, x = normed_269_cast_fp16)[name = string("normed_271")]; - tensor var_8646_to_fp16 = const()[name = string("op_8646_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261701632)))]; - tensor q_23_cast_fp16 = mul(x = normed_271, y = var_8646_to_fp16)[name = string("q_23_cast_fp16")]; - int32 var_8657 = const()[name = string("op_8657"), val = int32(-1)]; - fp16 const_426_promoted = const()[name = string("const_426_promoted"), val = fp16(-0x1p+0)]; - tensor var_8659 = mul(x = var_8593, y = const_426_promoted)[name = string("op_8659")]; - bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)]; - tensor input_227 = concat(axis = var_8657, interleave = input_227_interleave_0, values = (var_8593, var_8659))[name = string("input_227")]; - tensor normed_273_axes_0 = const()[name = string("normed_273_axes_0"), val = tensor([-1])]; - fp16 var_8654_to_fp16 = const()[name = string("op_8654_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_273_cast_fp16 = layer_norm(axes = normed_273_axes_0, epsilon = var_8654_to_fp16, x = input_227)[name = string("normed_273_cast_fp16")]; - tensor normed_275_begin_0 = const()[name = string("normed_275_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_275_end_0 = const()[name = string("normed_275_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_275_end_mask_0 = const()[name = string("normed_275_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_275 = slice_by_index(begin = normed_275_begin_0, end = normed_275_end_0, end_mask = normed_275_end_mask_0, x = normed_273_cast_fp16)[name = string("normed_275")]; - tensor var_8673_to_fp16 = const()[name = string("op_8673_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261702208)))]; - tensor k_23_cast_fp16 = mul(x = normed_275, y = var_8673_to_fp16)[name = string("k_23_cast_fp16")]; - tensor var_8675_cast_fp16 = mul(x = q_23_cast_fp16, y = cos_21_cast_fp16)[name = string("op_8675_cast_fp16")]; - tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23_cast_fp16)[name = string("x1_45_cast_fp16")]; - tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23_cast_fp16)[name = string("x2_45_cast_fp16")]; - fp16 const_432_promoted_to_fp16 = const()[name = string("const_432_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8696_cast_fp16 = mul(x = x2_45_cast_fp16, y = const_432_promoted_to_fp16)[name = string("op_8696_cast_fp16")]; - int32 var_8698 = const()[name = string("op_8698"), val = int32(-1)]; - bool var_8699_interleave_0 = const()[name = string("op_8699_interleave_0"), val = bool(false)]; - tensor var_8699_cast_fp16 = concat(axis = var_8698, interleave = var_8699_interleave_0, values = (var_8696_cast_fp16, x1_45_cast_fp16))[name = string("op_8699_cast_fp16")]; - tensor var_8700_cast_fp16 = mul(x = var_8699_cast_fp16, y = sin_21_cast_fp16)[name = string("op_8700_cast_fp16")]; - tensor query_states_45_cast_fp16 = add(x = var_8675_cast_fp16, y = var_8700_cast_fp16)[name = string("query_states_45_cast_fp16")]; - tensor var_8703_cast_fp16 = mul(x = k_23_cast_fp16, y = cos_21_cast_fp16)[name = string("op_8703_cast_fp16")]; - tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23_cast_fp16)[name = string("x1_47_cast_fp16")]; - tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23_cast_fp16)[name = string("x2_47_cast_fp16")]; - fp16 const_435_promoted_to_fp16 = const()[name = string("const_435_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8724_cast_fp16 = mul(x = x2_47_cast_fp16, y = const_435_promoted_to_fp16)[name = string("op_8724_cast_fp16")]; - int32 var_8726 = const()[name = string("op_8726"), val = int32(-1)]; - bool var_8727_interleave_0 = const()[name = string("op_8727_interleave_0"), val = bool(false)]; - tensor var_8727_cast_fp16 = concat(axis = var_8726, interleave = var_8727_interleave_0, values = (var_8724_cast_fp16, x1_47_cast_fp16))[name = string("op_8727_cast_fp16")]; - tensor var_8728_cast_fp16 = mul(x = var_8727_cast_fp16, y = sin_21_cast_fp16)[name = string("op_8728_cast_fp16")]; - tensor key_states_45_cast_fp16 = add(x = var_8703_cast_fp16, y = var_8728_cast_fp16)[name = string("key_states_45_cast_fp16")]; - tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([1])]; - tensor expand_dims_113 = const()[name = string("expand_dims_113"), val = tensor([0])]; - tensor expand_dims_115 = const()[name = string("expand_dims_115"), val = tensor([0])]; - tensor expand_dims_116 = const()[name = string("expand_dims_116"), val = tensor([2])]; - int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)]; - bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)]; - tensor concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (expand_dims_112, expand_dims_113, current_pos, expand_dims_115))[name = string("concat_50")]; - tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; - tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; - int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; - bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; - tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_116, concat_51_values1_0, var_4997, concat_51_values3_0))[name = string("concat_51")]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_3_stride_0, update = key_states_45_cast_fp16, x = coreml_update_state_63)[name = string("model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_74_write_state")]; - tensor coreml_update_state_74 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_74")]; - tensor expand_dims_118 = const()[name = string("expand_dims_118"), val = tensor([5])]; - tensor expand_dims_119 = const()[name = string("expand_dims_119"), val = tensor([0])]; - tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; - tensor expand_dims_122 = const()[name = string("expand_dims_122"), val = tensor([6])]; - int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; - bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; - tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (expand_dims_118, expand_dims_119, current_pos, expand_dims_121))[name = string("concat_54")]; - tensor concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = tensor([0])]; - tensor concat_55_values3_0 = const()[name = string("concat_55_values3_0"), val = tensor([0])]; - int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; - bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; - tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (expand_dims_122, concat_55_values1_0, var_4997, concat_55_values3_0))[name = string("concat_55")]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_54, begin_mask = model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0, end = concat_55, end_mask = model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_4_stride_0, update = var_8615, x = coreml_update_state_74)[name = string("model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_75_write_state")]; - tensor coreml_update_state_75 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_75")]; - tensor var_8783_begin_0 = const()[name = string("op_8783_begin_0"), val = tensor([1, 0, 0, 0])]; - tensor var_8783_end_0 = const()[name = string("op_8783_end_0"), val = tensor([2, 1, 4096, 256])]; - tensor var_8783_end_mask_0 = const()[name = string("op_8783_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_8783_cast_fp16 = slice_by_index(begin = var_8783_begin_0, end = var_8783_end_0, end_mask = var_8783_end_mask_0, x = coreml_update_state_75)[name = string("op_8783_cast_fp16")]; - tensor var_8790_begin_0 = const()[name = string("op_8790_begin_0"), val = tensor([5, 0, 0, 0])]; - tensor var_8790_end_0 = const()[name = string("op_8790_end_0"), val = tensor([6, 1, 4096, 256])]; - tensor var_8790_end_mask_0 = const()[name = string("op_8790_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_8790_cast_fp16 = slice_by_index(begin = var_8790_begin_0, end = var_8790_end_0, end_mask = var_8790_end_mask_0, x = coreml_update_state_75)[name = string("op_8790_cast_fp16")]; - tensor var_8827 = const()[name = string("op_8827"), val = tensor([1, 4, 1, 1])]; - tensor x_181_cast_fp16 = tile(reps = var_8827, x = var_8783_cast_fp16)[name = string("x_181_cast_fp16")]; - tensor var_8847 = const()[name = string("op_8847"), val = tensor([1, 4, 1, 1])]; - tensor x_187_cast_fp16 = tile(reps = var_8847, x = var_8790_cast_fp16)[name = string("x_187_cast_fp16")]; - bool var_8874_transpose_x_1 = const()[name = string("op_8874_transpose_x_1"), val = bool(false)]; - bool var_8874_transpose_y_1 = const()[name = string("op_8874_transpose_y_1"), val = bool(true)]; - tensor var_8874 = matmul(transpose_x = var_8874_transpose_x_1, transpose_y = var_8874_transpose_y_1, x = query_states_45_cast_fp16, y = x_181_cast_fp16)[name = string("op_8874")]; - fp16 var_8875_to_fp16 = const()[name = string("op_8875_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_67_cast_fp16 = mul(x = var_8874, y = var_8875_to_fp16)[name = string("attn_weights_67_cast_fp16")]; - tensor attn_weights_69_cast_fp16 = add(x = attn_weights_67_cast_fp16, y = causal_mask)[name = string("attn_weights_69_cast_fp16")]; - int32 var_8910 = const()[name = string("op_8910"), val = int32(-1)]; - tensor attn_weights_71_cast_fp16 = softmax(axis = var_8910, x = attn_weights_69_cast_fp16)[name = string("attn_weights_71_cast_fp16")]; - bool attn_output_111_transpose_x_0 = const()[name = string("attn_output_111_transpose_x_0"), val = bool(false)]; - bool attn_output_111_transpose_y_0 = const()[name = string("attn_output_111_transpose_y_0"), val = bool(false)]; - tensor attn_output_111_cast_fp16 = matmul(transpose_x = attn_output_111_transpose_x_0, transpose_y = attn_output_111_transpose_y_0, x = attn_weights_71_cast_fp16, y = x_187_cast_fp16)[name = string("attn_output_111_cast_fp16")]; - tensor var_8921_perm_0 = const()[name = string("op_8921_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_8925 = const()[name = string("op_8925"), val = tensor([1, 1, 1024])]; - tensor var_8921_cast_fp16 = transpose(perm = var_8921_perm_0, x = attn_output_111_cast_fp16)[name = string("transpose_88")]; - tensor attn_output_115_cast_fp16 = reshape(shape = var_8925, x = var_8921_cast_fp16)[name = string("attn_output_115_cast_fp16")]; - tensor var_8930 = const()[name = string("op_8930"), val = tensor([0, 2, 1])]; - string var_8946_pad_type_0 = const()[name = string("op_8946_pad_type_0"), val = string("valid")]; - int32 var_8946_groups_0 = const()[name = string("op_8946_groups_0"), val = int32(1)]; - tensor var_8946_strides_0 = const()[name = string("op_8946_strides_0"), val = tensor([1])]; - tensor var_8946_pad_0 = const()[name = string("op_8946_pad_0"), val = tensor([0, 0])]; - tensor var_8946_dilations_0 = const()[name = string("op_8946_dilations_0"), val = tensor([1])]; - tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261702784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262587584))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_8931_cast_fp16 = transpose(perm = var_8930, x = attn_output_115_cast_fp16)[name = string("transpose_87")]; - tensor var_8946_cast_fp16 = conv(dilations = var_8946_dilations_0, groups = var_8946_groups_0, pad = var_8946_pad_0, pad_type = var_8946_pad_type_0, strides = var_8946_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_8931_cast_fp16)[name = string("op_8946_cast_fp16")]; - tensor var_8950 = const()[name = string("op_8950"), val = tensor([0, 2, 1])]; - int32 var_8961 = const()[name = string("op_8961"), val = int32(-1)]; - fp16 const_444_promoted_to_fp16 = const()[name = string("const_444_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_185_cast_fp16 = transpose(perm = var_8950, x = var_8946_cast_fp16)[name = string("transpose_86")]; - tensor var_8963_cast_fp16 = mul(x = hidden_states_185_cast_fp16, y = const_444_promoted_to_fp16)[name = string("op_8963_cast_fp16")]; - bool input_231_interleave_0 = const()[name = string("input_231_interleave_0"), val = bool(false)]; - tensor input_231_cast_fp16 = concat(axis = var_8961, interleave = input_231_interleave_0, values = (hidden_states_185_cast_fp16, var_8963_cast_fp16))[name = string("input_231_cast_fp16")]; - tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; - fp16 var_8958_to_fp16 = const()[name = string("op_8958_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_8958_to_fp16, x = input_231_cast_fp16)[name = string("normed_277_cast_fp16")]; - tensor normed_279_begin_0 = const()[name = string("normed_279_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_279_end_0 = const()[name = string("normed_279_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_279_end_mask_0 = const()[name = string("normed_279_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_279_cast_fp16 = slice_by_index(begin = normed_279_begin_0, end = normed_279_end_0, end_mask = normed_279_end_mask_0, x = normed_277_cast_fp16)[name = string("normed_279_cast_fp16")]; - tensor var_8977_to_fp16 = const()[name = string("op_8977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262606080)))]; - tensor attn_output_119_cast_fp16 = mul(x = normed_279_cast_fp16, y = var_8977_to_fp16)[name = string("attn_output_119_cast_fp16")]; - tensor hidden_states_187_cast_fp16 = add(x = hidden_states_177_cast_fp16, y = attn_output_119_cast_fp16)[name = string("hidden_states_187_cast_fp16")]; - int32 var_8990 = const()[name = string("op_8990"), val = int32(-1)]; - fp16 const_448_promoted_to_fp16 = const()[name = string("const_448_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8992_cast_fp16 = mul(x = hidden_states_187_cast_fp16, y = const_448_promoted_to_fp16)[name = string("op_8992_cast_fp16")]; - bool input_233_interleave_0 = const()[name = string("input_233_interleave_0"), val = bool(false)]; - tensor input_233_cast_fp16 = concat(axis = var_8990, interleave = input_233_interleave_0, values = (hidden_states_187_cast_fp16, var_8992_cast_fp16))[name = string("input_233_cast_fp16")]; - tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; - fp16 var_8987_to_fp16 = const()[name = string("op_8987_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_8987_to_fp16, x = input_233_cast_fp16)[name = string("normed_281_cast_fp16")]; - tensor normed_283_begin_0 = const()[name = string("normed_283_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_283_end_0 = const()[name = string("normed_283_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_283_end_mask_0 = const()[name = string("normed_283_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_283_cast_fp16 = slice_by_index(begin = normed_283_begin_0, end = normed_283_end_0, end_mask = normed_283_end_mask_0, x = normed_281_cast_fp16)[name = string("normed_283_cast_fp16")]; - tensor var_9006_to_fp16 = const()[name = string("op_9006_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262608448)))]; - tensor x_189_cast_fp16 = mul(x = normed_283_cast_fp16, y = var_9006_to_fp16)[name = string("x_189_cast_fp16")]; - tensor var_9018 = const()[name = string("op_9018"), val = tensor([0, 2, 1])]; - tensor input_235_axes_0 = const()[name = string("input_235_axes_0"), val = tensor([2])]; - tensor var_9019_cast_fp16 = transpose(perm = var_9018, x = x_189_cast_fp16)[name = string("transpose_85")]; - tensor input_235_cast_fp16 = expand_dims(axes = input_235_axes_0, x = var_9019_cast_fp16)[name = string("input_235_cast_fp16")]; - string x_191_pad_type_0 = const()[name = string("x_191_pad_type_0"), val = string("valid")]; - tensor x_191_strides_0 = const()[name = string("x_191_strides_0"), val = tensor([1, 1])]; - tensor x_191_pad_0 = const()[name = string("x_191_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_191_dilations_0 = const()[name = string("x_191_dilations_0"), val = tensor([1, 1])]; - int32 x_191_groups_0 = const()[name = string("x_191_groups_0"), val = int32(1)]; - tensor model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(694787456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700759488))))[name = string("model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_191_cast_fp16 = conv(dilations = x_191_dilations_0, groups = x_191_groups_0, pad = x_191_pad_0, pad_type = x_191_pad_type_0, strides = x_191_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_235_cast_fp16)[name = string("x_191_cast_fp16")]; - string b_23_pad_type_0 = const()[name = string("b_23_pad_type_0"), val = string("valid")]; - tensor b_23_strides_0 = const()[name = string("b_23_strides_0"), val = tensor([1, 1])]; - tensor b_23_pad_0 = const()[name = string("b_23_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_23_dilations_0 = const()[name = string("b_23_dilations_0"), val = tensor([1, 1])]; - int32 b_23_groups_0 = const()[name = string("b_23_groups_0"), val = int32(1)]; - tensor model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700870144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(706842176))))[name = string("model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_23_cast_fp16 = conv(dilations = b_23_dilations_0, groups = b_23_groups_0, pad = b_23_pad_0, pad_type = b_23_pad_type_0, strides = b_23_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_235_cast_fp16)[name = string("b_23_cast_fp16")]; - string var_9044_mode_0 = const()[name = string("op_9044_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_9044_cast_fp16 = gelu(mode = var_9044_mode_0, x = x_191_cast_fp16)[name = string("op_9044_cast_fp16")]; - tensor input_237_cast_fp16 = mul(x = var_9044_cast_fp16, y = b_23_cast_fp16)[name = string("input_237_cast_fp16")]; - string e_23_pad_type_0 = const()[name = string("e_23_pad_type_0"), val = string("valid")]; - tensor e_23_strides_0 = const()[name = string("e_23_strides_0"), val = tensor([1, 1])]; - tensor e_23_pad_0 = const()[name = string("e_23_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_23_dilations_0 = const()[name = string("e_23_dilations_0"), val = tensor([1, 1])]; - int32 e_23_groups_0 = const()[name = string("e_23_groups_0"), val = int32(1)]; - tensor model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274776192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280748224))))[name = string("model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_23_cast_fp16 = conv(dilations = e_23_dilations_0, groups = e_23_groups_0, pad = e_23_pad_0, pad_type = e_23_pad_type_0, strides = e_23_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_237_cast_fp16)[name = string("e_23_cast_fp16")]; - tensor var_9052_axes_0 = const()[name = string("op_9052_axes_0"), val = tensor([2])]; - tensor var_9052_cast_fp16 = squeeze(axes = var_9052_axes_0, x = e_23_cast_fp16)[name = string("op_9052_cast_fp16")]; - tensor var_9053 = const()[name = string("op_9053"), val = tensor([0, 2, 1])]; - int32 var_9064 = const()[name = string("op_9064"), val = int32(-1)]; - fp16 const_452_promoted_to_fp16 = const()[name = string("const_452_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_189_cast_fp16 = transpose(perm = var_9053, x = var_9052_cast_fp16)[name = string("transpose_84")]; - tensor var_9066_cast_fp16 = mul(x = hidden_states_189_cast_fp16, y = const_452_promoted_to_fp16)[name = string("op_9066_cast_fp16")]; - bool input_239_interleave_0 = const()[name = string("input_239_interleave_0"), val = bool(false)]; - tensor input_239_cast_fp16 = concat(axis = var_9064, interleave = input_239_interleave_0, values = (hidden_states_189_cast_fp16, var_9066_cast_fp16))[name = string("input_239_cast_fp16")]; - tensor normed_285_axes_0 = const()[name = string("normed_285_axes_0"), val = tensor([-1])]; - fp16 var_9061_to_fp16 = const()[name = string("op_9061_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_285_cast_fp16 = layer_norm(axes = normed_285_axes_0, epsilon = var_9061_to_fp16, x = input_239_cast_fp16)[name = string("normed_285_cast_fp16")]; - tensor normed_287_begin_0 = const()[name = string("normed_287_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_287_end_0 = const()[name = string("normed_287_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_287_end_mask_0 = const()[name = string("normed_287_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_287_cast_fp16 = slice_by_index(begin = normed_287_begin_0, end = normed_287_end_0, end_mask = normed_287_end_mask_0, x = normed_285_cast_fp16)[name = string("normed_287_cast_fp16")]; - tensor var_9080_to_fp16 = const()[name = string("op_9080_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280766720)))]; - tensor hidden_states_191_cast_fp16 = mul(x = normed_287_cast_fp16, y = var_9080_to_fp16)[name = string("hidden_states_191_cast_fp16")]; - tensor hidden_states_193_cast_fp16 = add(x = hidden_states_187_cast_fp16, y = hidden_states_191_cast_fp16)[name = string("hidden_states_193_cast_fp16")]; - int32 var_9131 = const()[name = string("op_9131"), val = int32(-1)]; - fp16 const_456_promoted_to_fp16 = const()[name = string("const_456_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9133_cast_fp16 = mul(x = hidden_states_193_cast_fp16, y = const_456_promoted_to_fp16)[name = string("op_9133_cast_fp16")]; - bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; - tensor input_241_cast_fp16 = concat(axis = var_9131, interleave = input_241_interleave_0, values = (hidden_states_193_cast_fp16, var_9133_cast_fp16))[name = string("input_241_cast_fp16")]; - tensor normed_289_axes_0 = const()[name = string("normed_289_axes_0"), val = tensor([-1])]; - fp16 var_9128_to_fp16 = const()[name = string("op_9128_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_289_cast_fp16 = layer_norm(axes = normed_289_axes_0, epsilon = var_9128_to_fp16, x = input_241_cast_fp16)[name = string("normed_289_cast_fp16")]; - tensor normed_291_begin_0 = const()[name = string("normed_291_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_291_end_0 = const()[name = string("normed_291_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_291_end_mask_0 = const()[name = string("normed_291_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_291_cast_fp16 = slice_by_index(begin = normed_291_begin_0, end = normed_291_end_0, end_mask = normed_291_end_mask_0, x = normed_289_cast_fp16)[name = string("normed_291_cast_fp16")]; - tensor var_9147_to_fp16 = const()[name = string("op_9147_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280769088)))]; - tensor hidden_states_195_cast_fp16 = mul(x = normed_291_cast_fp16, y = var_9147_to_fp16)[name = string("hidden_states_195_cast_fp16")]; - tensor var_9152 = const()[name = string("op_9152"), val = tensor([0, 2, 1])]; - tensor var_9155_axes_0 = const()[name = string("op_9155_axes_0"), val = tensor([2])]; - tensor var_9153_cast_fp16 = transpose(perm = var_9152, x = hidden_states_195_cast_fp16)[name = string("transpose_83")]; - tensor var_9155_cast_fp16 = expand_dims(axes = var_9155_axes_0, x = var_9153_cast_fp16)[name = string("op_9155_cast_fp16")]; - string var_9171_pad_type_0 = const()[name = string("op_9171_pad_type_0"), val = string("valid")]; - tensor var_9171_strides_0 = const()[name = string("op_9171_strides_0"), val = tensor([1, 1])]; - tensor var_9171_pad_0 = const()[name = string("op_9171_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_9171_dilations_0 = const()[name = string("op_9171_dilations_0"), val = tensor([1, 1])]; - int32 var_9171_groups_0 = const()[name = string("op_9171_groups_0"), val = int32(1)]; - tensor var_9171 = conv(dilations = var_9171_dilations_0, groups = var_9171_groups_0, pad = var_9171_pad_0, pad_type = var_9171_pad_type_0, strides = var_9171_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_9155_cast_fp16)[name = string("op_9171")]; - tensor var_9176 = const()[name = string("op_9176"), val = tensor([1, 4, 1, 256])]; - tensor var_9177 = reshape(shape = var_9176, x = var_9171)[name = string("op_9177")]; - string var_9193_pad_type_0 = const()[name = string("op_9193_pad_type_0"), val = string("valid")]; - tensor var_9193_strides_0 = const()[name = string("op_9193_strides_0"), val = tensor([1, 1])]; - tensor var_9193_pad_0 = const()[name = string("op_9193_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_9193_dilations_0 = const()[name = string("op_9193_dilations_0"), val = tensor([1, 1])]; - int32 var_9193_groups_0 = const()[name = string("op_9193_groups_0"), val = int32(1)]; - tensor var_9193 = conv(dilations = var_9193_dilations_0, groups = var_9193_groups_0, pad = var_9193_pad_0, pad_type = var_9193_pad_type_0, strides = var_9193_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_9155_cast_fp16)[name = string("op_9193")]; - tensor var_9198 = const()[name = string("op_9198"), val = tensor([1, 1, 1, 256])]; - tensor var_9199 = reshape(shape = var_9198, x = var_9193)[name = string("op_9199")]; - string var_9215_pad_type_0 = const()[name = string("op_9215_pad_type_0"), val = string("valid")]; - tensor var_9215_strides_0 = const()[name = string("op_9215_strides_0"), val = tensor([1, 1])]; - tensor var_9215_pad_0 = const()[name = string("op_9215_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_9215_dilations_0 = const()[name = string("op_9215_dilations_0"), val = tensor([1, 1])]; - int32 var_9215_groups_0 = const()[name = string("op_9215_groups_0"), val = int32(1)]; - tensor var_9215 = conv(dilations = var_9215_dilations_0, groups = var_9215_groups_0, pad = var_9215_pad_0, pad_type = var_9215_pad_type_0, strides = var_9215_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_9155_cast_fp16)[name = string("op_9215")]; - tensor var_9220 = const()[name = string("op_9220"), val = tensor([1, 1, 1, 256])]; - tensor var_9221 = reshape(shape = var_9220, x = var_9215)[name = string("op_9221")]; - int32 var_9236 = const()[name = string("op_9236"), val = int32(-1)]; - fp16 const_460_promoted = const()[name = string("const_460_promoted"), val = fp16(-0x1p+0)]; - tensor var_9238 = mul(x = var_9177, y = const_460_promoted)[name = string("op_9238")]; - bool input_245_interleave_0 = const()[name = string("input_245_interleave_0"), val = bool(false)]; - tensor input_245 = concat(axis = var_9236, interleave = input_245_interleave_0, values = (var_9177, var_9238))[name = string("input_245")]; - tensor normed_293_axes_0 = const()[name = string("normed_293_axes_0"), val = tensor([-1])]; - fp16 var_9233_to_fp16 = const()[name = string("op_9233_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_293_cast_fp16 = layer_norm(axes = normed_293_axes_0, epsilon = var_9233_to_fp16, x = input_245)[name = string("normed_293_cast_fp16")]; - tensor normed_295_begin_0 = const()[name = string("normed_295_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_295_end_0 = const()[name = string("normed_295_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_295_end_mask_0 = const()[name = string("normed_295_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_295 = slice_by_index(begin = normed_295_begin_0, end = normed_295_end_0, end_mask = normed_295_end_mask_0, x = normed_293_cast_fp16)[name = string("normed_295")]; - tensor var_9252_to_fp16 = const()[name = string("op_9252_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280771456)))]; - tensor q_25_cast_fp16 = mul(x = normed_295, y = var_9252_to_fp16)[name = string("q_25_cast_fp16")]; - int32 var_9263 = const()[name = string("op_9263"), val = int32(-1)]; - fp16 const_464_promoted = const()[name = string("const_464_promoted"), val = fp16(-0x1p+0)]; - tensor var_9265 = mul(x = var_9199, y = const_464_promoted)[name = string("op_9265")]; - bool input_247_interleave_0 = const()[name = string("input_247_interleave_0"), val = bool(false)]; - tensor input_247 = concat(axis = var_9263, interleave = input_247_interleave_0, values = (var_9199, var_9265))[name = string("input_247")]; - tensor normed_297_axes_0 = const()[name = string("normed_297_axes_0"), val = tensor([-1])]; - fp16 var_9260_to_fp16 = const()[name = string("op_9260_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_297_cast_fp16 = layer_norm(axes = normed_297_axes_0, epsilon = var_9260_to_fp16, x = input_247)[name = string("normed_297_cast_fp16")]; - tensor normed_299_begin_0 = const()[name = string("normed_299_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_299_end_0 = const()[name = string("normed_299_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_299_end_mask_0 = const()[name = string("normed_299_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_299 = slice_by_index(begin = normed_299_begin_0, end = normed_299_end_0, end_mask = normed_299_end_mask_0, x = normed_297_cast_fp16)[name = string("normed_299")]; - tensor var_9279_to_fp16 = const()[name = string("op_9279_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280772032)))]; - tensor k_25_cast_fp16 = mul(x = normed_299, y = var_9279_to_fp16)[name = string("k_25_cast_fp16")]; - tensor var_9281_cast_fp16 = mul(x = q_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9281_cast_fp16")]; - tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25_cast_fp16)[name = string("x1_49_cast_fp16")]; - tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25_cast_fp16)[name = string("x2_49_cast_fp16")]; - fp16 const_470_promoted_to_fp16 = const()[name = string("const_470_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9302_cast_fp16 = mul(x = x2_49_cast_fp16, y = const_470_promoted_to_fp16)[name = string("op_9302_cast_fp16")]; - int32 var_9304 = const()[name = string("op_9304"), val = int32(-1)]; - bool var_9305_interleave_0 = const()[name = string("op_9305_interleave_0"), val = bool(false)]; - tensor var_9305_cast_fp16 = concat(axis = var_9304, interleave = var_9305_interleave_0, values = (var_9302_cast_fp16, x1_49_cast_fp16))[name = string("op_9305_cast_fp16")]; - tensor var_9306_cast_fp16 = mul(x = var_9305_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9306_cast_fp16")]; - tensor query_states_49_cast_fp16 = add(x = var_9281_cast_fp16, y = var_9306_cast_fp16)[name = string("query_states_49_cast_fp16")]; - tensor var_9309_cast_fp16 = mul(x = k_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9309_cast_fp16")]; - tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25_cast_fp16)[name = string("x1_51_cast_fp16")]; - tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25_cast_fp16)[name = string("x2_51_cast_fp16")]; - fp16 const_473_promoted_to_fp16 = const()[name = string("const_473_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9330_cast_fp16 = mul(x = x2_51_cast_fp16, y = const_473_promoted_to_fp16)[name = string("op_9330_cast_fp16")]; - int32 var_9332 = const()[name = string("op_9332"), val = int32(-1)]; - bool var_9333_interleave_0 = const()[name = string("op_9333_interleave_0"), val = bool(false)]; - tensor var_9333_cast_fp16 = concat(axis = var_9332, interleave = var_9333_interleave_0, values = (var_9330_cast_fp16, x1_51_cast_fp16))[name = string("op_9333_cast_fp16")]; - tensor var_9334_cast_fp16 = mul(x = var_9333_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9334_cast_fp16")]; - tensor key_states_49_cast_fp16 = add(x = var_9309_cast_fp16, y = var_9334_cast_fp16)[name = string("key_states_49_cast_fp16")]; - tensor key_slice_21_begin_0 = const()[name = string("key_slice_21_begin_0"), val = tensor([10, 0, 0, 0])]; - tensor key_slice_21_end_0 = const()[name = string("key_slice_21_end_0"), val = tensor([11, 1, 512, 256])]; - tensor key_slice_21_end_mask_0 = const()[name = string("key_slice_21_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_21_cast_fp16 = slice_by_index(begin = key_slice_21_begin_0, end = key_slice_21_end_0, end_mask = key_slice_21_end_mask_0, x = coreml_update_state_73)[name = string("key_slice_21_cast_fp16")]; - tensor key_tail_21_begin_0 = const()[name = string("key_tail_21_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_21_end_0 = const()[name = string("key_tail_21_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_21_cast_fp16 = slice_by_index(begin = key_tail_21_begin_0, end = key_tail_21_end_0, x = key_slice_21_cast_fp16)[name = string("key_tail_21_cast_fp16")]; - int32 var_9347 = const()[name = string("op_9347"), val = int32(2)]; - bool shifted_key_21_interleave_0 = const()[name = string("shifted_key_21_interleave_0"), val = bool(false)]; - tensor shifted_key_21_cast_fp16 = concat(axis = var_9347, interleave = shifted_key_21_interleave_0, values = (key_tail_21_cast_fp16, key_states_49_cast_fp16))[name = string("shifted_key_21_cast_fp16")]; - tensor concat_56 = const()[name = string("concat_56"), val = tensor([10, 0, 0, 0])]; - tensor concat_57 = const()[name = string("concat_57"), val = tensor([11, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_21_stride_0, update = shifted_key_21_cast_fp16, x = coreml_update_state_73)[name = string("model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_76_write_state")]; - tensor coreml_update_state_76 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_76")]; - tensor value_slice_21_begin_0 = const()[name = string("value_slice_21_begin_0"), val = tensor([32, 0, 0, 0])]; - tensor value_slice_21_end_0 = const()[name = string("value_slice_21_end_0"), val = tensor([33, 1, 512, 256])]; - tensor value_slice_21_end_mask_0 = const()[name = string("value_slice_21_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_21_cast_fp16 = slice_by_index(begin = value_slice_21_begin_0, end = value_slice_21_end_0, end_mask = value_slice_21_end_mask_0, x = coreml_update_state_76)[name = string("value_slice_21_cast_fp16")]; - tensor value_tail_21_begin_0 = const()[name = string("value_tail_21_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_21_end_0 = const()[name = string("value_tail_21_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_21_cast_fp16 = slice_by_index(begin = value_tail_21_begin_0, end = value_tail_21_end_0, x = value_slice_21_cast_fp16)[name = string("value_tail_21_cast_fp16")]; - int32 var_9381 = const()[name = string("op_9381"), val = int32(2)]; - bool shifted_value_21_interleave_0 = const()[name = string("shifted_value_21_interleave_0"), val = bool(false)]; - tensor shifted_value_21_cast_fp16 = concat(axis = var_9381, interleave = shifted_value_21_interleave_0, values = (value_tail_21_cast_fp16, var_9221))[name = string("shifted_value_21_cast_fp16")]; - tensor concat_58 = const()[name = string("concat_58"), val = tensor([32, 0, 0, 0])]; - tensor concat_59 = const()[name = string("concat_59"), val = tensor([33, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_22_stride_0, update = shifted_value_21_cast_fp16, x = coreml_update_state_76)[name = string("model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_77_write_state")]; - tensor coreml_update_state_77 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_77")]; - tensor var_9409_begin_0 = const()[name = string("op_9409_begin_0"), val = tensor([10, 0, 0, 0])]; - tensor var_9409_end_0 = const()[name = string("op_9409_end_0"), val = tensor([11, 1, 512, 256])]; - tensor var_9409_end_mask_0 = const()[name = string("op_9409_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_9409_cast_fp16 = slice_by_index(begin = var_9409_begin_0, end = var_9409_end_0, end_mask = var_9409_end_mask_0, x = coreml_update_state_77)[name = string("op_9409_cast_fp16")]; - tensor var_9416_begin_0 = const()[name = string("op_9416_begin_0"), val = tensor([32, 0, 0, 0])]; - tensor var_9416_end_0 = const()[name = string("op_9416_end_0"), val = tensor([33, 1, 512, 256])]; - tensor var_9416_end_mask_0 = const()[name = string("op_9416_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_9416_cast_fp16 = slice_by_index(begin = var_9416_begin_0, end = var_9416_end_0, end_mask = var_9416_end_mask_0, x = coreml_update_state_77)[name = string("op_9416_cast_fp16")]; - tensor var_9453 = const()[name = string("op_9453"), val = tensor([1, 4, 1, 1])]; - tensor x_197_cast_fp16 = tile(reps = var_9453, x = var_9409_cast_fp16)[name = string("x_197_cast_fp16")]; - tensor var_9473 = const()[name = string("op_9473"), val = tensor([1, 4, 1, 1])]; - tensor x_203_cast_fp16 = tile(reps = var_9473, x = var_9416_cast_fp16)[name = string("x_203_cast_fp16")]; - bool var_9500_transpose_x_1 = const()[name = string("op_9500_transpose_x_1"), val = bool(false)]; - bool var_9500_transpose_y_1 = const()[name = string("op_9500_transpose_y_1"), val = bool(true)]; - tensor var_9500 = matmul(transpose_x = var_9500_transpose_x_1, transpose_y = var_9500_transpose_y_1, x = query_states_49_cast_fp16, y = x_197_cast_fp16)[name = string("op_9500")]; - fp16 var_9501_to_fp16 = const()[name = string("op_9501_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_73_cast_fp16 = mul(x = var_9500, y = var_9501_to_fp16)[name = string("attn_weights_73_cast_fp16")]; - tensor attn_weights_75_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = var_2059)[name = string("attn_weights_75_cast_fp16")]; - int32 var_9536 = const()[name = string("op_9536"), val = int32(-1)]; - tensor attn_weights_77_cast_fp16 = softmax(axis = var_9536, x = attn_weights_75_cast_fp16)[name = string("attn_weights_77_cast_fp16")]; - bool attn_output_121_transpose_x_0 = const()[name = string("attn_output_121_transpose_x_0"), val = bool(false)]; - bool attn_output_121_transpose_y_0 = const()[name = string("attn_output_121_transpose_y_0"), val = bool(false)]; - tensor attn_output_121_cast_fp16 = matmul(transpose_x = attn_output_121_transpose_x_0, transpose_y = attn_output_121_transpose_y_0, x = attn_weights_77_cast_fp16, y = x_203_cast_fp16)[name = string("attn_output_121_cast_fp16")]; - tensor var_9547_perm_0 = const()[name = string("op_9547_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_9551 = const()[name = string("op_9551"), val = tensor([1, 1, 1024])]; - tensor var_9547_cast_fp16 = transpose(perm = var_9547_perm_0, x = attn_output_121_cast_fp16)[name = string("transpose_82")]; - tensor attn_output_125_cast_fp16 = reshape(shape = var_9551, x = var_9547_cast_fp16)[name = string("attn_output_125_cast_fp16")]; - tensor var_9556 = const()[name = string("op_9556"), val = tensor([0, 2, 1])]; - string var_9572_pad_type_0 = const()[name = string("op_9572_pad_type_0"), val = string("valid")]; - int32 var_9572_groups_0 = const()[name = string("op_9572_groups_0"), val = int32(1)]; - tensor var_9572_strides_0 = const()[name = string("op_9572_strides_0"), val = tensor([1])]; - tensor var_9572_pad_0 = const()[name = string("op_9572_pad_0"), val = tensor([0, 0])]; - tensor var_9572_dilations_0 = const()[name = string("op_9572_dilations_0"), val = tensor([1])]; - tensor squeeze_12_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280772608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281657408))))[name = string("squeeze_12_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_9557_cast_fp16 = transpose(perm = var_9556, x = attn_output_125_cast_fp16)[name = string("transpose_81")]; - tensor var_9572_cast_fp16 = conv(dilations = var_9572_dilations_0, groups = var_9572_groups_0, pad = var_9572_pad_0, pad_type = var_9572_pad_type_0, strides = var_9572_strides_0, weight = squeeze_12_cast_fp16_to_fp32_to_fp16_palettized, x = var_9557_cast_fp16)[name = string("op_9572_cast_fp16")]; - tensor var_9576 = const()[name = string("op_9576"), val = tensor([0, 2, 1])]; - int32 var_9587 = const()[name = string("op_9587"), val = int32(-1)]; - fp16 const_482_promoted_to_fp16 = const()[name = string("const_482_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_201_cast_fp16 = transpose(perm = var_9576, x = var_9572_cast_fp16)[name = string("transpose_80")]; - tensor var_9589_cast_fp16 = mul(x = hidden_states_201_cast_fp16, y = const_482_promoted_to_fp16)[name = string("op_9589_cast_fp16")]; - bool input_251_interleave_0 = const()[name = string("input_251_interleave_0"), val = bool(false)]; - tensor input_251_cast_fp16 = concat(axis = var_9587, interleave = input_251_interleave_0, values = (hidden_states_201_cast_fp16, var_9589_cast_fp16))[name = string("input_251_cast_fp16")]; - tensor normed_301_axes_0 = const()[name = string("normed_301_axes_0"), val = tensor([-1])]; - fp16 var_9584_to_fp16 = const()[name = string("op_9584_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_301_cast_fp16 = layer_norm(axes = normed_301_axes_0, epsilon = var_9584_to_fp16, x = input_251_cast_fp16)[name = string("normed_301_cast_fp16")]; - tensor normed_303_begin_0 = const()[name = string("normed_303_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_303_end_0 = const()[name = string("normed_303_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_303_end_mask_0 = const()[name = string("normed_303_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_303_cast_fp16 = slice_by_index(begin = normed_303_begin_0, end = normed_303_end_0, end_mask = normed_303_end_mask_0, x = normed_301_cast_fp16)[name = string("normed_303_cast_fp16")]; - tensor var_9603_to_fp16 = const()[name = string("op_9603_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281675904)))]; - tensor attn_output_129_cast_fp16 = mul(x = normed_303_cast_fp16, y = var_9603_to_fp16)[name = string("attn_output_129_cast_fp16")]; - tensor hidden_states_203_cast_fp16 = add(x = hidden_states_193_cast_fp16, y = attn_output_129_cast_fp16)[name = string("hidden_states_203_cast_fp16")]; - int32 var_9616 = const()[name = string("op_9616"), val = int32(-1)]; - fp16 const_486_promoted_to_fp16 = const()[name = string("const_486_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9618_cast_fp16 = mul(x = hidden_states_203_cast_fp16, y = const_486_promoted_to_fp16)[name = string("op_9618_cast_fp16")]; - bool input_253_interleave_0 = const()[name = string("input_253_interleave_0"), val = bool(false)]; - tensor input_253_cast_fp16 = concat(axis = var_9616, interleave = input_253_interleave_0, values = (hidden_states_203_cast_fp16, var_9618_cast_fp16))[name = string("input_253_cast_fp16")]; - tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; - fp16 var_9613_to_fp16 = const()[name = string("op_9613_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_9613_to_fp16, x = input_253_cast_fp16)[name = string("normed_305_cast_fp16")]; - tensor normed_307_begin_0 = const()[name = string("normed_307_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_307_end_0 = const()[name = string("normed_307_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_307_end_mask_0 = const()[name = string("normed_307_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_307_cast_fp16 = slice_by_index(begin = normed_307_begin_0, end = normed_307_end_0, end_mask = normed_307_end_mask_0, x = normed_305_cast_fp16)[name = string("normed_307_cast_fp16")]; - tensor var_9632_to_fp16 = const()[name = string("op_9632_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281678272)))]; - tensor x_205_cast_fp16 = mul(x = normed_307_cast_fp16, y = var_9632_to_fp16)[name = string("x_205_cast_fp16")]; - tensor var_9644 = const()[name = string("op_9644"), val = tensor([0, 2, 1])]; - tensor input_255_axes_0 = const()[name = string("input_255_axes_0"), val = tensor([2])]; - tensor var_9645_cast_fp16 = transpose(perm = var_9644, x = x_205_cast_fp16)[name = string("transpose_79")]; - tensor input_255_cast_fp16 = expand_dims(axes = input_255_axes_0, x = var_9645_cast_fp16)[name = string("input_255_cast_fp16")]; - string x_207_pad_type_0 = const()[name = string("x_207_pad_type_0"), val = string("valid")]; - tensor x_207_strides_0 = const()[name = string("x_207_strides_0"), val = tensor([1, 1])]; - tensor x_207_pad_0 = const()[name = string("x_207_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_207_dilations_0 = const()[name = string("x_207_dilations_0"), val = tensor([1, 1])]; - int32 x_207_groups_0 = const()[name = string("x_207_groups_0"), val = int32(1)]; - tensor model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(706952832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(712924864))))[name = string("model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_207_cast_fp16 = conv(dilations = x_207_dilations_0, groups = x_207_groups_0, pad = x_207_pad_0, pad_type = x_207_pad_type_0, strides = x_207_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_255_cast_fp16)[name = string("x_207_cast_fp16")]; - string b_25_pad_type_0 = const()[name = string("b_25_pad_type_0"), val = string("valid")]; - tensor b_25_strides_0 = const()[name = string("b_25_strides_0"), val = tensor([1, 1])]; - tensor b_25_pad_0 = const()[name = string("b_25_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_25_dilations_0 = const()[name = string("b_25_dilations_0"), val = tensor([1, 1])]; - int32 b_25_groups_0 = const()[name = string("b_25_groups_0"), val = int32(1)]; - tensor model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(713035520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(719007552))))[name = string("model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_25_cast_fp16 = conv(dilations = b_25_dilations_0, groups = b_25_groups_0, pad = b_25_pad_0, pad_type = b_25_pad_type_0, strides = b_25_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_255_cast_fp16)[name = string("b_25_cast_fp16")]; - string var_9670_mode_0 = const()[name = string("op_9670_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_9670_cast_fp16 = gelu(mode = var_9670_mode_0, x = x_207_cast_fp16)[name = string("op_9670_cast_fp16")]; - tensor input_257_cast_fp16 = mul(x = var_9670_cast_fp16, y = b_25_cast_fp16)[name = string("input_257_cast_fp16")]; - string e_25_pad_type_0 = const()[name = string("e_25_pad_type_0"), val = string("valid")]; - tensor e_25_strides_0 = const()[name = string("e_25_strides_0"), val = tensor([1, 1])]; - tensor e_25_pad_0 = const()[name = string("e_25_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_25_dilations_0 = const()[name = string("e_25_dilations_0"), val = tensor([1, 1])]; - int32 e_25_groups_0 = const()[name = string("e_25_groups_0"), val = int32(1)]; - tensor model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293846016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299818048))))[name = string("model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_25_cast_fp16 = conv(dilations = e_25_dilations_0, groups = e_25_groups_0, pad = e_25_pad_0, pad_type = e_25_pad_type_0, strides = e_25_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_257_cast_fp16)[name = string("e_25_cast_fp16")]; - tensor var_9678_axes_0 = const()[name = string("op_9678_axes_0"), val = tensor([2])]; - tensor var_9678_cast_fp16 = squeeze(axes = var_9678_axes_0, x = e_25_cast_fp16)[name = string("op_9678_cast_fp16")]; - tensor var_9679 = const()[name = string("op_9679"), val = tensor([0, 2, 1])]; - int32 var_9690 = const()[name = string("op_9690"), val = int32(-1)]; - fp16 const_490_promoted_to_fp16 = const()[name = string("const_490_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_205_cast_fp16 = transpose(perm = var_9679, x = var_9678_cast_fp16)[name = string("transpose_78")]; - tensor var_9692_cast_fp16 = mul(x = hidden_states_205_cast_fp16, y = const_490_promoted_to_fp16)[name = string("op_9692_cast_fp16")]; - bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)]; - tensor input_259_cast_fp16 = concat(axis = var_9690, interleave = input_259_interleave_0, values = (hidden_states_205_cast_fp16, var_9692_cast_fp16))[name = string("input_259_cast_fp16")]; - tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; - fp16 var_9687_to_fp16 = const()[name = string("op_9687_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_9687_to_fp16, x = input_259_cast_fp16)[name = string("normed_309_cast_fp16")]; - tensor normed_311_begin_0 = const()[name = string("normed_311_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_311_end_0 = const()[name = string("normed_311_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_311_end_mask_0 = const()[name = string("normed_311_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_311_cast_fp16 = slice_by_index(begin = normed_311_begin_0, end = normed_311_end_0, end_mask = normed_311_end_mask_0, x = normed_309_cast_fp16)[name = string("normed_311_cast_fp16")]; - tensor var_9706_to_fp16 = const()[name = string("op_9706_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299836544)))]; - tensor hidden_states_207_cast_fp16 = mul(x = normed_311_cast_fp16, y = var_9706_to_fp16)[name = string("hidden_states_207_cast_fp16")]; - tensor hidden_states_209_cast_fp16 = add(x = hidden_states_203_cast_fp16, y = hidden_states_207_cast_fp16)[name = string("hidden_states_209_cast_fp16")]; - int32 var_9757 = const()[name = string("op_9757"), val = int32(-1)]; - fp16 const_494_promoted_to_fp16 = const()[name = string("const_494_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9759_cast_fp16 = mul(x = hidden_states_209_cast_fp16, y = const_494_promoted_to_fp16)[name = string("op_9759_cast_fp16")]; - bool input_261_interleave_0 = const()[name = string("input_261_interleave_0"), val = bool(false)]; - tensor input_261_cast_fp16 = concat(axis = var_9757, interleave = input_261_interleave_0, values = (hidden_states_209_cast_fp16, var_9759_cast_fp16))[name = string("input_261_cast_fp16")]; - tensor normed_313_axes_0 = const()[name = string("normed_313_axes_0"), val = tensor([-1])]; - fp16 var_9754_to_fp16 = const()[name = string("op_9754_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_313_cast_fp16 = layer_norm(axes = normed_313_axes_0, epsilon = var_9754_to_fp16, x = input_261_cast_fp16)[name = string("normed_313_cast_fp16")]; - tensor normed_315_begin_0 = const()[name = string("normed_315_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_315_end_0 = const()[name = string("normed_315_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_315_end_mask_0 = const()[name = string("normed_315_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_315_cast_fp16 = slice_by_index(begin = normed_315_begin_0, end = normed_315_end_0, end_mask = normed_315_end_mask_0, x = normed_313_cast_fp16)[name = string("normed_315_cast_fp16")]; - tensor var_9773_to_fp16 = const()[name = string("op_9773_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299838912)))]; - tensor hidden_states_211_cast_fp16 = mul(x = normed_315_cast_fp16, y = var_9773_to_fp16)[name = string("hidden_states_211_cast_fp16")]; - tensor var_9778 = const()[name = string("op_9778"), val = tensor([0, 2, 1])]; - tensor var_9781_axes_0 = const()[name = string("op_9781_axes_0"), val = tensor([2])]; - tensor var_9779_cast_fp16 = transpose(perm = var_9778, x = hidden_states_211_cast_fp16)[name = string("transpose_77")]; - tensor var_9781_cast_fp16 = expand_dims(axes = var_9781_axes_0, x = var_9779_cast_fp16)[name = string("op_9781_cast_fp16")]; - string var_9797_pad_type_0 = const()[name = string("op_9797_pad_type_0"), val = string("valid")]; - tensor var_9797_strides_0 = const()[name = string("op_9797_strides_0"), val = tensor([1, 1])]; - tensor var_9797_pad_0 = const()[name = string("op_9797_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_9797_dilations_0 = const()[name = string("op_9797_dilations_0"), val = tensor([1, 1])]; - int32 var_9797_groups_0 = const()[name = string("op_9797_groups_0"), val = int32(1)]; - tensor var_9797 = conv(dilations = var_9797_dilations_0, groups = var_9797_groups_0, pad = var_9797_pad_0, pad_type = var_9797_pad_type_0, strides = var_9797_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_9781_cast_fp16)[name = string("op_9797")]; - tensor var_9802 = const()[name = string("op_9802"), val = tensor([1, 4, 1, 256])]; - tensor var_9803 = reshape(shape = var_9802, x = var_9797)[name = string("op_9803")]; - string var_9819_pad_type_0 = const()[name = string("op_9819_pad_type_0"), val = string("valid")]; - tensor var_9819_strides_0 = const()[name = string("op_9819_strides_0"), val = tensor([1, 1])]; - tensor var_9819_pad_0 = const()[name = string("op_9819_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_9819_dilations_0 = const()[name = string("op_9819_dilations_0"), val = tensor([1, 1])]; - int32 var_9819_groups_0 = const()[name = string("op_9819_groups_0"), val = int32(1)]; - tensor var_9819 = conv(dilations = var_9819_dilations_0, groups = var_9819_groups_0, pad = var_9819_pad_0, pad_type = var_9819_pad_type_0, strides = var_9819_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_9781_cast_fp16)[name = string("op_9819")]; - tensor var_9824 = const()[name = string("op_9824"), val = tensor([1, 1, 1, 256])]; - tensor var_9825 = reshape(shape = var_9824, x = var_9819)[name = string("op_9825")]; - string var_9841_pad_type_0 = const()[name = string("op_9841_pad_type_0"), val = string("valid")]; - tensor var_9841_strides_0 = const()[name = string("op_9841_strides_0"), val = tensor([1, 1])]; - tensor var_9841_pad_0 = const()[name = string("op_9841_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_9841_dilations_0 = const()[name = string("op_9841_dilations_0"), val = tensor([1, 1])]; - int32 var_9841_groups_0 = const()[name = string("op_9841_groups_0"), val = int32(1)]; - tensor var_9841 = conv(dilations = var_9841_dilations_0, groups = var_9841_groups_0, pad = var_9841_pad_0, pad_type = var_9841_pad_type_0, strides = var_9841_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_9781_cast_fp16)[name = string("op_9841")]; - tensor var_9846 = const()[name = string("op_9846"), val = tensor([1, 1, 1, 256])]; - tensor var_9847 = reshape(shape = var_9846, x = var_9841)[name = string("op_9847")]; - int32 var_9862 = const()[name = string("op_9862"), val = int32(-1)]; - fp16 const_498_promoted = const()[name = string("const_498_promoted"), val = fp16(-0x1p+0)]; - tensor var_9864 = mul(x = var_9803, y = const_498_promoted)[name = string("op_9864")]; - bool input_265_interleave_0 = const()[name = string("input_265_interleave_0"), val = bool(false)]; - tensor input_265 = concat(axis = var_9862, interleave = input_265_interleave_0, values = (var_9803, var_9864))[name = string("input_265")]; - tensor normed_317_axes_0 = const()[name = string("normed_317_axes_0"), val = tensor([-1])]; - fp16 var_9859_to_fp16 = const()[name = string("op_9859_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_317_cast_fp16 = layer_norm(axes = normed_317_axes_0, epsilon = var_9859_to_fp16, x = input_265)[name = string("normed_317_cast_fp16")]; - tensor normed_319_begin_0 = const()[name = string("normed_319_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_319_end_0 = const()[name = string("normed_319_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_319_end_mask_0 = const()[name = string("normed_319_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_319 = slice_by_index(begin = normed_319_begin_0, end = normed_319_end_0, end_mask = normed_319_end_mask_0, x = normed_317_cast_fp16)[name = string("normed_319")]; - tensor var_9878_to_fp16 = const()[name = string("op_9878_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299841280)))]; - tensor q_27_cast_fp16 = mul(x = normed_319, y = var_9878_to_fp16)[name = string("q_27_cast_fp16")]; - int32 var_9889 = const()[name = string("op_9889"), val = int32(-1)]; - fp16 const_502_promoted = const()[name = string("const_502_promoted"), val = fp16(-0x1p+0)]; - tensor var_9891 = mul(x = var_9825, y = const_502_promoted)[name = string("op_9891")]; - bool input_267_interleave_0 = const()[name = string("input_267_interleave_0"), val = bool(false)]; - tensor input_267 = concat(axis = var_9889, interleave = input_267_interleave_0, values = (var_9825, var_9891))[name = string("input_267")]; - tensor normed_321_axes_0 = const()[name = string("normed_321_axes_0"), val = tensor([-1])]; - fp16 var_9886_to_fp16 = const()[name = string("op_9886_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_321_cast_fp16 = layer_norm(axes = normed_321_axes_0, epsilon = var_9886_to_fp16, x = input_267)[name = string("normed_321_cast_fp16")]; - tensor normed_323_begin_0 = const()[name = string("normed_323_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_323_end_0 = const()[name = string("normed_323_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_323_end_mask_0 = const()[name = string("normed_323_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_323 = slice_by_index(begin = normed_323_begin_0, end = normed_323_end_0, end_mask = normed_323_end_mask_0, x = normed_321_cast_fp16)[name = string("normed_323")]; - tensor var_9905_to_fp16 = const()[name = string("op_9905_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299841856)))]; - tensor k_27_cast_fp16 = mul(x = normed_323, y = var_9905_to_fp16)[name = string("k_27_cast_fp16")]; - tensor var_9907_cast_fp16 = mul(x = q_27_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9907_cast_fp16")]; - tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27_cast_fp16)[name = string("x1_53_cast_fp16")]; - tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27_cast_fp16)[name = string("x2_53_cast_fp16")]; - fp16 const_508_promoted_to_fp16 = const()[name = string("const_508_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9928_cast_fp16 = mul(x = x2_53_cast_fp16, y = const_508_promoted_to_fp16)[name = string("op_9928_cast_fp16")]; - int32 var_9930 = const()[name = string("op_9930"), val = int32(-1)]; - bool var_9931_interleave_0 = const()[name = string("op_9931_interleave_0"), val = bool(false)]; - tensor var_9931_cast_fp16 = concat(axis = var_9930, interleave = var_9931_interleave_0, values = (var_9928_cast_fp16, x1_53_cast_fp16))[name = string("op_9931_cast_fp16")]; - tensor var_9932_cast_fp16 = mul(x = var_9931_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9932_cast_fp16")]; - tensor query_states_53_cast_fp16 = add(x = var_9907_cast_fp16, y = var_9932_cast_fp16)[name = string("query_states_53_cast_fp16")]; - tensor var_9935_cast_fp16 = mul(x = k_27_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9935_cast_fp16")]; - tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27_cast_fp16)[name = string("x1_55_cast_fp16")]; - tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27_cast_fp16)[name = string("x2_55_cast_fp16")]; - fp16 const_511_promoted_to_fp16 = const()[name = string("const_511_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9956_cast_fp16 = mul(x = x2_55_cast_fp16, y = const_511_promoted_to_fp16)[name = string("op_9956_cast_fp16")]; - int32 var_9958 = const()[name = string("op_9958"), val = int32(-1)]; - bool var_9959_interleave_0 = const()[name = string("op_9959_interleave_0"), val = bool(false)]; - tensor var_9959_cast_fp16 = concat(axis = var_9958, interleave = var_9959_interleave_0, values = (var_9956_cast_fp16, x1_55_cast_fp16))[name = string("op_9959_cast_fp16")]; - tensor var_9960_cast_fp16 = mul(x = var_9959_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9960_cast_fp16")]; - tensor key_states_53_cast_fp16 = add(x = var_9935_cast_fp16, y = var_9960_cast_fp16)[name = string("key_states_53_cast_fp16")]; - tensor key_slice_23_begin_0 = const()[name = string("key_slice_23_begin_0"), val = tensor([11, 0, 0, 0])]; - tensor key_slice_23_end_0 = const()[name = string("key_slice_23_end_0"), val = tensor([12, 1, 512, 256])]; - tensor key_slice_23_end_mask_0 = const()[name = string("key_slice_23_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_23_cast_fp16 = slice_by_index(begin = key_slice_23_begin_0, end = key_slice_23_end_0, end_mask = key_slice_23_end_mask_0, x = coreml_update_state_77)[name = string("key_slice_23_cast_fp16")]; - tensor key_tail_23_begin_0 = const()[name = string("key_tail_23_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_23_end_0 = const()[name = string("key_tail_23_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_23_cast_fp16 = slice_by_index(begin = key_tail_23_begin_0, end = key_tail_23_end_0, x = key_slice_23_cast_fp16)[name = string("key_tail_23_cast_fp16")]; - int32 var_9973 = const()[name = string("op_9973"), val = int32(2)]; - bool shifted_key_23_interleave_0 = const()[name = string("shifted_key_23_interleave_0"), val = bool(false)]; - tensor shifted_key_23_cast_fp16 = concat(axis = var_9973, interleave = shifted_key_23_interleave_0, values = (key_tail_23_cast_fp16, key_states_53_cast_fp16))[name = string("shifted_key_23_cast_fp16")]; - tensor concat_60 = const()[name = string("concat_60"), val = tensor([11, 0, 0, 0])]; - tensor concat_61 = const()[name = string("concat_61"), val = tensor([12, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_23_stride_0, update = shifted_key_23_cast_fp16, x = coreml_update_state_77)[name = string("model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_78_write_state")]; - tensor coreml_update_state_78 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_78")]; - tensor value_slice_23_begin_0 = const()[name = string("value_slice_23_begin_0"), val = tensor([33, 0, 0, 0])]; - tensor value_slice_23_end_0 = const()[name = string("value_slice_23_end_0"), val = tensor([34, 1, 512, 256])]; - tensor value_slice_23_end_mask_0 = const()[name = string("value_slice_23_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_23_cast_fp16 = slice_by_index(begin = value_slice_23_begin_0, end = value_slice_23_end_0, end_mask = value_slice_23_end_mask_0, x = coreml_update_state_78)[name = string("value_slice_23_cast_fp16")]; - tensor value_tail_23_begin_0 = const()[name = string("value_tail_23_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_23_end_0 = const()[name = string("value_tail_23_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_23_cast_fp16 = slice_by_index(begin = value_tail_23_begin_0, end = value_tail_23_end_0, x = value_slice_23_cast_fp16)[name = string("value_tail_23_cast_fp16")]; - int32 var_10007 = const()[name = string("op_10007"), val = int32(2)]; - bool shifted_value_23_interleave_0 = const()[name = string("shifted_value_23_interleave_0"), val = bool(false)]; - tensor shifted_value_23_cast_fp16 = concat(axis = var_10007, interleave = shifted_value_23_interleave_0, values = (value_tail_23_cast_fp16, var_9847))[name = string("shifted_value_23_cast_fp16")]; - tensor concat_62 = const()[name = string("concat_62"), val = tensor([33, 0, 0, 0])]; - tensor concat_63 = const()[name = string("concat_63"), val = tensor([34, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_62, begin_mask = model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0, end = concat_63, end_mask = model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_24_stride_0, update = shifted_value_23_cast_fp16, x = coreml_update_state_78)[name = string("model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_79_write_state")]; - tensor coreml_update_state_79 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_79")]; - tensor var_10035_begin_0 = const()[name = string("op_10035_begin_0"), val = tensor([11, 0, 0, 0])]; - tensor var_10035_end_0 = const()[name = string("op_10035_end_0"), val = tensor([12, 1, 512, 256])]; - tensor var_10035_end_mask_0 = const()[name = string("op_10035_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_10035_cast_fp16 = slice_by_index(begin = var_10035_begin_0, end = var_10035_end_0, end_mask = var_10035_end_mask_0, x = coreml_update_state_79)[name = string("op_10035_cast_fp16")]; - tensor var_10042_begin_0 = const()[name = string("op_10042_begin_0"), val = tensor([33, 0, 0, 0])]; - tensor var_10042_end_0 = const()[name = string("op_10042_end_0"), val = tensor([34, 1, 512, 256])]; - tensor var_10042_end_mask_0 = const()[name = string("op_10042_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_10042_cast_fp16 = slice_by_index(begin = var_10042_begin_0, end = var_10042_end_0, end_mask = var_10042_end_mask_0, x = coreml_update_state_79)[name = string("op_10042_cast_fp16")]; - tensor var_10079 = const()[name = string("op_10079"), val = tensor([1, 4, 1, 1])]; - tensor x_213_cast_fp16 = tile(reps = var_10079, x = var_10035_cast_fp16)[name = string("x_213_cast_fp16")]; - tensor var_10099 = const()[name = string("op_10099"), val = tensor([1, 4, 1, 1])]; - tensor x_219_cast_fp16 = tile(reps = var_10099, x = var_10042_cast_fp16)[name = string("x_219_cast_fp16")]; - bool var_10126_transpose_x_1 = const()[name = string("op_10126_transpose_x_1"), val = bool(false)]; - bool var_10126_transpose_y_1 = const()[name = string("op_10126_transpose_y_1"), val = bool(true)]; - tensor var_10126 = matmul(transpose_x = var_10126_transpose_x_1, transpose_y = var_10126_transpose_y_1, x = query_states_53_cast_fp16, y = x_213_cast_fp16)[name = string("op_10126")]; - fp16 var_10127_to_fp16 = const()[name = string("op_10127_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_79_cast_fp16 = mul(x = var_10126, y = var_10127_to_fp16)[name = string("attn_weights_79_cast_fp16")]; - tensor attn_weights_81_cast_fp16 = add(x = attn_weights_79_cast_fp16, y = var_2059)[name = string("attn_weights_81_cast_fp16")]; - int32 var_10162 = const()[name = string("op_10162"), val = int32(-1)]; - tensor attn_weights_83_cast_fp16 = softmax(axis = var_10162, x = attn_weights_81_cast_fp16)[name = string("attn_weights_83_cast_fp16")]; - bool attn_output_131_transpose_x_0 = const()[name = string("attn_output_131_transpose_x_0"), val = bool(false)]; - bool attn_output_131_transpose_y_0 = const()[name = string("attn_output_131_transpose_y_0"), val = bool(false)]; - tensor attn_output_131_cast_fp16 = matmul(transpose_x = attn_output_131_transpose_x_0, transpose_y = attn_output_131_transpose_y_0, x = attn_weights_83_cast_fp16, y = x_219_cast_fp16)[name = string("attn_output_131_cast_fp16")]; - tensor var_10173_perm_0 = const()[name = string("op_10173_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_10177 = const()[name = string("op_10177"), val = tensor([1, 1, 1024])]; - tensor var_10173_cast_fp16 = transpose(perm = var_10173_perm_0, x = attn_output_131_cast_fp16)[name = string("transpose_76")]; - tensor attn_output_135_cast_fp16 = reshape(shape = var_10177, x = var_10173_cast_fp16)[name = string("attn_output_135_cast_fp16")]; - tensor var_10182 = const()[name = string("op_10182"), val = tensor([0, 2, 1])]; - string var_10198_pad_type_0 = const()[name = string("op_10198_pad_type_0"), val = string("valid")]; - int32 var_10198_groups_0 = const()[name = string("op_10198_groups_0"), val = int32(1)]; - tensor var_10198_strides_0 = const()[name = string("op_10198_strides_0"), val = tensor([1])]; - tensor var_10198_pad_0 = const()[name = string("op_10198_pad_0"), val = tensor([0, 0])]; - tensor var_10198_dilations_0 = const()[name = string("op_10198_dilations_0"), val = tensor([1])]; - tensor squeeze_13_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299842432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300727232))))[name = string("squeeze_13_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_10183_cast_fp16 = transpose(perm = var_10182, x = attn_output_135_cast_fp16)[name = string("transpose_75")]; - tensor var_10198_cast_fp16 = conv(dilations = var_10198_dilations_0, groups = var_10198_groups_0, pad = var_10198_pad_0, pad_type = var_10198_pad_type_0, strides = var_10198_strides_0, weight = squeeze_13_cast_fp16_to_fp32_to_fp16_palettized, x = var_10183_cast_fp16)[name = string("op_10198_cast_fp16")]; - tensor var_10202 = const()[name = string("op_10202"), val = tensor([0, 2, 1])]; - int32 var_10213 = const()[name = string("op_10213"), val = int32(-1)]; - fp16 const_520_promoted_to_fp16 = const()[name = string("const_520_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_217_cast_fp16 = transpose(perm = var_10202, x = var_10198_cast_fp16)[name = string("transpose_74")]; - tensor var_10215_cast_fp16 = mul(x = hidden_states_217_cast_fp16, y = const_520_promoted_to_fp16)[name = string("op_10215_cast_fp16")]; - bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; - tensor input_271_cast_fp16 = concat(axis = var_10213, interleave = input_271_interleave_0, values = (hidden_states_217_cast_fp16, var_10215_cast_fp16))[name = string("input_271_cast_fp16")]; - tensor normed_325_axes_0 = const()[name = string("normed_325_axes_0"), val = tensor([-1])]; - fp16 var_10210_to_fp16 = const()[name = string("op_10210_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_325_cast_fp16 = layer_norm(axes = normed_325_axes_0, epsilon = var_10210_to_fp16, x = input_271_cast_fp16)[name = string("normed_325_cast_fp16")]; - tensor normed_327_begin_0 = const()[name = string("normed_327_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_327_end_0 = const()[name = string("normed_327_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_327_end_mask_0 = const()[name = string("normed_327_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_327_cast_fp16 = slice_by_index(begin = normed_327_begin_0, end = normed_327_end_0, end_mask = normed_327_end_mask_0, x = normed_325_cast_fp16)[name = string("normed_327_cast_fp16")]; - tensor var_10229_to_fp16 = const()[name = string("op_10229_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300745728)))]; - tensor attn_output_139_cast_fp16 = mul(x = normed_327_cast_fp16, y = var_10229_to_fp16)[name = string("attn_output_139_cast_fp16")]; - tensor hidden_states_219_cast_fp16 = add(x = hidden_states_209_cast_fp16, y = attn_output_139_cast_fp16)[name = string("hidden_states_219_cast_fp16")]; - int32 var_10242 = const()[name = string("op_10242"), val = int32(-1)]; - fp16 const_524_promoted_to_fp16 = const()[name = string("const_524_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10244_cast_fp16 = mul(x = hidden_states_219_cast_fp16, y = const_524_promoted_to_fp16)[name = string("op_10244_cast_fp16")]; - bool input_273_interleave_0 = const()[name = string("input_273_interleave_0"), val = bool(false)]; - tensor input_273_cast_fp16 = concat(axis = var_10242, interleave = input_273_interleave_0, values = (hidden_states_219_cast_fp16, var_10244_cast_fp16))[name = string("input_273_cast_fp16")]; - tensor normed_329_axes_0 = const()[name = string("normed_329_axes_0"), val = tensor([-1])]; - fp16 var_10239_to_fp16 = const()[name = string("op_10239_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_329_cast_fp16 = layer_norm(axes = normed_329_axes_0, epsilon = var_10239_to_fp16, x = input_273_cast_fp16)[name = string("normed_329_cast_fp16")]; - tensor normed_331_begin_0 = const()[name = string("normed_331_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_331_end_0 = const()[name = string("normed_331_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_331_end_mask_0 = const()[name = string("normed_331_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_331_cast_fp16 = slice_by_index(begin = normed_331_begin_0, end = normed_331_end_0, end_mask = normed_331_end_mask_0, x = normed_329_cast_fp16)[name = string("normed_331_cast_fp16")]; - tensor var_10258_to_fp16 = const()[name = string("op_10258_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300748096)))]; - tensor x_221_cast_fp16 = mul(x = normed_331_cast_fp16, y = var_10258_to_fp16)[name = string("x_221_cast_fp16")]; - tensor var_10270 = const()[name = string("op_10270"), val = tensor([0, 2, 1])]; - tensor input_275_axes_0 = const()[name = string("input_275_axes_0"), val = tensor([2])]; - tensor var_10271_cast_fp16 = transpose(perm = var_10270, x = x_221_cast_fp16)[name = string("transpose_73")]; - tensor input_275_cast_fp16 = expand_dims(axes = input_275_axes_0, x = var_10271_cast_fp16)[name = string("input_275_cast_fp16")]; - string x_223_pad_type_0 = const()[name = string("x_223_pad_type_0"), val = string("valid")]; - tensor x_223_strides_0 = const()[name = string("x_223_strides_0"), val = tensor([1, 1])]; - tensor x_223_pad_0 = const()[name = string("x_223_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_223_dilations_0 = const()[name = string("x_223_dilations_0"), val = tensor([1, 1])]; - int32 x_223_groups_0 = const()[name = string("x_223_groups_0"), val = int32(1)]; - tensor model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(719118208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725090240))))[name = string("model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_223_cast_fp16 = conv(dilations = x_223_dilations_0, groups = x_223_groups_0, pad = x_223_pad_0, pad_type = x_223_pad_type_0, strides = x_223_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_275_cast_fp16)[name = string("x_223_cast_fp16")]; - string b_27_pad_type_0 = const()[name = string("b_27_pad_type_0"), val = string("valid")]; - tensor b_27_strides_0 = const()[name = string("b_27_strides_0"), val = tensor([1, 1])]; - tensor b_27_pad_0 = const()[name = string("b_27_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_27_dilations_0 = const()[name = string("b_27_dilations_0"), val = tensor([1, 1])]; - int32 b_27_groups_0 = const()[name = string("b_27_groups_0"), val = int32(1)]; - tensor model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(725200896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(731172928))))[name = string("model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_27_cast_fp16 = conv(dilations = b_27_dilations_0, groups = b_27_groups_0, pad = b_27_pad_0, pad_type = b_27_pad_type_0, strides = b_27_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_275_cast_fp16)[name = string("b_27_cast_fp16")]; - string var_10296_mode_0 = const()[name = string("op_10296_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_10296_cast_fp16 = gelu(mode = var_10296_mode_0, x = x_223_cast_fp16)[name = string("op_10296_cast_fp16")]; - tensor input_277_cast_fp16 = mul(x = var_10296_cast_fp16, y = b_27_cast_fp16)[name = string("input_277_cast_fp16")]; - string e_27_pad_type_0 = const()[name = string("e_27_pad_type_0"), val = string("valid")]; - tensor e_27_strides_0 = const()[name = string("e_27_strides_0"), val = tensor([1, 1])]; - tensor e_27_pad_0 = const()[name = string("e_27_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_27_dilations_0 = const()[name = string("e_27_dilations_0"), val = tensor([1, 1])]; - int32 e_27_groups_0 = const()[name = string("e_27_groups_0"), val = int32(1)]; - tensor model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312915840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318887872))))[name = string("model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_27_cast_fp16 = conv(dilations = e_27_dilations_0, groups = e_27_groups_0, pad = e_27_pad_0, pad_type = e_27_pad_type_0, strides = e_27_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_277_cast_fp16)[name = string("e_27_cast_fp16")]; - tensor var_10304_axes_0 = const()[name = string("op_10304_axes_0"), val = tensor([2])]; - tensor var_10304_cast_fp16 = squeeze(axes = var_10304_axes_0, x = e_27_cast_fp16)[name = string("op_10304_cast_fp16")]; - tensor var_10305 = const()[name = string("op_10305"), val = tensor([0, 2, 1])]; - int32 var_10316 = const()[name = string("op_10316"), val = int32(-1)]; - fp16 const_528_promoted_to_fp16 = const()[name = string("const_528_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_221_cast_fp16 = transpose(perm = var_10305, x = var_10304_cast_fp16)[name = string("transpose_72")]; - tensor var_10318_cast_fp16 = mul(x = hidden_states_221_cast_fp16, y = const_528_promoted_to_fp16)[name = string("op_10318_cast_fp16")]; - bool input_279_interleave_0 = const()[name = string("input_279_interleave_0"), val = bool(false)]; - tensor input_279_cast_fp16 = concat(axis = var_10316, interleave = input_279_interleave_0, values = (hidden_states_221_cast_fp16, var_10318_cast_fp16))[name = string("input_279_cast_fp16")]; - tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; - fp16 var_10313_to_fp16 = const()[name = string("op_10313_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_10313_to_fp16, x = input_279_cast_fp16)[name = string("normed_333_cast_fp16")]; - tensor normed_335_begin_0 = const()[name = string("normed_335_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_335_end_0 = const()[name = string("normed_335_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_335_end_mask_0 = const()[name = string("normed_335_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_335_cast_fp16 = slice_by_index(begin = normed_335_begin_0, end = normed_335_end_0, end_mask = normed_335_end_mask_0, x = normed_333_cast_fp16)[name = string("normed_335_cast_fp16")]; - tensor var_10332_to_fp16 = const()[name = string("op_10332_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318906368)))]; - tensor hidden_states_223_cast_fp16 = mul(x = normed_335_cast_fp16, y = var_10332_to_fp16)[name = string("hidden_states_223_cast_fp16")]; - tensor hidden_states_225_cast_fp16 = add(x = hidden_states_219_cast_fp16, y = hidden_states_223_cast_fp16)[name = string("hidden_states_225_cast_fp16")]; - int32 var_10383 = const()[name = string("op_10383"), val = int32(-1)]; - fp16 const_532_promoted_to_fp16 = const()[name = string("const_532_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10385_cast_fp16 = mul(x = hidden_states_225_cast_fp16, y = const_532_promoted_to_fp16)[name = string("op_10385_cast_fp16")]; - bool input_281_interleave_0 = const()[name = string("input_281_interleave_0"), val = bool(false)]; - tensor input_281_cast_fp16 = concat(axis = var_10383, interleave = input_281_interleave_0, values = (hidden_states_225_cast_fp16, var_10385_cast_fp16))[name = string("input_281_cast_fp16")]; - tensor normed_337_axes_0 = const()[name = string("normed_337_axes_0"), val = tensor([-1])]; - fp16 var_10380_to_fp16 = const()[name = string("op_10380_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_337_cast_fp16 = layer_norm(axes = normed_337_axes_0, epsilon = var_10380_to_fp16, x = input_281_cast_fp16)[name = string("normed_337_cast_fp16")]; - tensor normed_339_begin_0 = const()[name = string("normed_339_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_339_end_0 = const()[name = string("normed_339_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_339_end_mask_0 = const()[name = string("normed_339_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_339_cast_fp16 = slice_by_index(begin = normed_339_begin_0, end = normed_339_end_0, end_mask = normed_339_end_mask_0, x = normed_337_cast_fp16)[name = string("normed_339_cast_fp16")]; - tensor var_10399_to_fp16 = const()[name = string("op_10399_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318908736)))]; - tensor hidden_states_227_cast_fp16 = mul(x = normed_339_cast_fp16, y = var_10399_to_fp16)[name = string("hidden_states_227_cast_fp16")]; - tensor var_10404 = const()[name = string("op_10404"), val = tensor([0, 2, 1])]; - tensor var_10407_axes_0 = const()[name = string("op_10407_axes_0"), val = tensor([2])]; - tensor var_10405_cast_fp16 = transpose(perm = var_10404, x = hidden_states_227_cast_fp16)[name = string("transpose_71")]; - tensor var_10407_cast_fp16 = expand_dims(axes = var_10407_axes_0, x = var_10405_cast_fp16)[name = string("op_10407_cast_fp16")]; - string var_10423_pad_type_0 = const()[name = string("op_10423_pad_type_0"), val = string("valid")]; - tensor var_10423_strides_0 = const()[name = string("op_10423_strides_0"), val = tensor([1, 1])]; - tensor var_10423_pad_0 = const()[name = string("op_10423_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_10423_dilations_0 = const()[name = string("op_10423_dilations_0"), val = tensor([1, 1])]; - int32 var_10423_groups_0 = const()[name = string("op_10423_groups_0"), val = int32(1)]; - tensor var_10423 = conv(dilations = var_10423_dilations_0, groups = var_10423_groups_0, pad = var_10423_pad_0, pad_type = var_10423_pad_type_0, strides = var_10423_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_10407_cast_fp16)[name = string("op_10423")]; - tensor var_10428 = const()[name = string("op_10428"), val = tensor([1, 4, 1, 256])]; - tensor var_10429 = reshape(shape = var_10428, x = var_10423)[name = string("op_10429")]; - string var_10445_pad_type_0 = const()[name = string("op_10445_pad_type_0"), val = string("valid")]; - tensor var_10445_strides_0 = const()[name = string("op_10445_strides_0"), val = tensor([1, 1])]; - tensor var_10445_pad_0 = const()[name = string("op_10445_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_10445_dilations_0 = const()[name = string("op_10445_dilations_0"), val = tensor([1, 1])]; - int32 var_10445_groups_0 = const()[name = string("op_10445_groups_0"), val = int32(1)]; - tensor var_10445 = conv(dilations = var_10445_dilations_0, groups = var_10445_groups_0, pad = var_10445_pad_0, pad_type = var_10445_pad_type_0, strides = var_10445_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_10407_cast_fp16)[name = string("op_10445")]; - tensor var_10450 = const()[name = string("op_10450"), val = tensor([1, 1, 1, 256])]; - tensor var_10451 = reshape(shape = var_10450, x = var_10445)[name = string("op_10451")]; - string var_10467_pad_type_0 = const()[name = string("op_10467_pad_type_0"), val = string("valid")]; - tensor var_10467_strides_0 = const()[name = string("op_10467_strides_0"), val = tensor([1, 1])]; - tensor var_10467_pad_0 = const()[name = string("op_10467_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_10467_dilations_0 = const()[name = string("op_10467_dilations_0"), val = tensor([1, 1])]; - int32 var_10467_groups_0 = const()[name = string("op_10467_groups_0"), val = int32(1)]; - tensor var_10467 = conv(dilations = var_10467_dilations_0, groups = var_10467_groups_0, pad = var_10467_pad_0, pad_type = var_10467_pad_type_0, strides = var_10467_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_10407_cast_fp16)[name = string("op_10467")]; - tensor var_10472 = const()[name = string("op_10472"), val = tensor([1, 1, 1, 256])]; - tensor var_10473 = reshape(shape = var_10472, x = var_10467)[name = string("op_10473")]; - int32 var_10488 = const()[name = string("op_10488"), val = int32(-1)]; - fp16 const_536_promoted = const()[name = string("const_536_promoted"), val = fp16(-0x1p+0)]; - tensor var_10490 = mul(x = var_10429, y = const_536_promoted)[name = string("op_10490")]; - bool input_285_interleave_0 = const()[name = string("input_285_interleave_0"), val = bool(false)]; - tensor input_285 = concat(axis = var_10488, interleave = input_285_interleave_0, values = (var_10429, var_10490))[name = string("input_285")]; - tensor normed_341_axes_0 = const()[name = string("normed_341_axes_0"), val = tensor([-1])]; - fp16 var_10485_to_fp16 = const()[name = string("op_10485_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_341_cast_fp16 = layer_norm(axes = normed_341_axes_0, epsilon = var_10485_to_fp16, x = input_285)[name = string("normed_341_cast_fp16")]; - tensor normed_343_begin_0 = const()[name = string("normed_343_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_343_end_0 = const()[name = string("normed_343_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_343_end_mask_0 = const()[name = string("normed_343_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_343 = slice_by_index(begin = normed_343_begin_0, end = normed_343_end_0, end_mask = normed_343_end_mask_0, x = normed_341_cast_fp16)[name = string("normed_343")]; - tensor var_10504_to_fp16 = const()[name = string("op_10504_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318911104)))]; - tensor q_29_cast_fp16 = mul(x = normed_343, y = var_10504_to_fp16)[name = string("q_29_cast_fp16")]; - int32 var_10515 = const()[name = string("op_10515"), val = int32(-1)]; - fp16 const_540_promoted = const()[name = string("const_540_promoted"), val = fp16(-0x1p+0)]; - tensor var_10517 = mul(x = var_10451, y = const_540_promoted)[name = string("op_10517")]; - bool input_287_interleave_0 = const()[name = string("input_287_interleave_0"), val = bool(false)]; - tensor input_287 = concat(axis = var_10515, interleave = input_287_interleave_0, values = (var_10451, var_10517))[name = string("input_287")]; - tensor normed_345_axes_0 = const()[name = string("normed_345_axes_0"), val = tensor([-1])]; - fp16 var_10512_to_fp16 = const()[name = string("op_10512_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_345_cast_fp16 = layer_norm(axes = normed_345_axes_0, epsilon = var_10512_to_fp16, x = input_287)[name = string("normed_345_cast_fp16")]; - tensor normed_347_begin_0 = const()[name = string("normed_347_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_347_end_0 = const()[name = string("normed_347_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_347_end_mask_0 = const()[name = string("normed_347_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_347 = slice_by_index(begin = normed_347_begin_0, end = normed_347_end_0, end_mask = normed_347_end_mask_0, x = normed_345_cast_fp16)[name = string("normed_347")]; - tensor var_10531_to_fp16 = const()[name = string("op_10531_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318911680)))]; - tensor k_29_cast_fp16 = mul(x = normed_347, y = var_10531_to_fp16)[name = string("k_29_cast_fp16")]; - tensor var_10533_cast_fp16 = mul(x = q_29_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10533_cast_fp16")]; - tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29_cast_fp16)[name = string("x1_57_cast_fp16")]; - tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29_cast_fp16)[name = string("x2_57_cast_fp16")]; - fp16 const_546_promoted_to_fp16 = const()[name = string("const_546_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10554_cast_fp16 = mul(x = x2_57_cast_fp16, y = const_546_promoted_to_fp16)[name = string("op_10554_cast_fp16")]; - int32 var_10556 = const()[name = string("op_10556"), val = int32(-1)]; - bool var_10557_interleave_0 = const()[name = string("op_10557_interleave_0"), val = bool(false)]; - tensor var_10557_cast_fp16 = concat(axis = var_10556, interleave = var_10557_interleave_0, values = (var_10554_cast_fp16, x1_57_cast_fp16))[name = string("op_10557_cast_fp16")]; - tensor var_10558_cast_fp16 = mul(x = var_10557_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10558_cast_fp16")]; - tensor query_states_57_cast_fp16 = add(x = var_10533_cast_fp16, y = var_10558_cast_fp16)[name = string("query_states_57_cast_fp16")]; - tensor var_10561_cast_fp16 = mul(x = k_29_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10561_cast_fp16")]; - tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29_cast_fp16)[name = string("x1_59_cast_fp16")]; - tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29_cast_fp16)[name = string("x2_59_cast_fp16")]; - fp16 const_549_promoted_to_fp16 = const()[name = string("const_549_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10582_cast_fp16 = mul(x = x2_59_cast_fp16, y = const_549_promoted_to_fp16)[name = string("op_10582_cast_fp16")]; - int32 var_10584 = const()[name = string("op_10584"), val = int32(-1)]; - bool var_10585_interleave_0 = const()[name = string("op_10585_interleave_0"), val = bool(false)]; - tensor var_10585_cast_fp16 = concat(axis = var_10584, interleave = var_10585_interleave_0, values = (var_10582_cast_fp16, x1_59_cast_fp16))[name = string("op_10585_cast_fp16")]; - tensor var_10586_cast_fp16 = mul(x = var_10585_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10586_cast_fp16")]; - tensor key_states_57_cast_fp16 = add(x = var_10561_cast_fp16, y = var_10586_cast_fp16)[name = string("key_states_57_cast_fp16")]; - tensor key_slice_25_begin_0 = const()[name = string("key_slice_25_begin_0"), val = tensor([12, 0, 0, 0])]; - tensor key_slice_25_end_0 = const()[name = string("key_slice_25_end_0"), val = tensor([13, 1, 512, 256])]; - tensor key_slice_25_end_mask_0 = const()[name = string("key_slice_25_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_25_cast_fp16 = slice_by_index(begin = key_slice_25_begin_0, end = key_slice_25_end_0, end_mask = key_slice_25_end_mask_0, x = coreml_update_state_79)[name = string("key_slice_25_cast_fp16")]; - tensor key_tail_25_begin_0 = const()[name = string("key_tail_25_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_25_end_0 = const()[name = string("key_tail_25_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_25_cast_fp16 = slice_by_index(begin = key_tail_25_begin_0, end = key_tail_25_end_0, x = key_slice_25_cast_fp16)[name = string("key_tail_25_cast_fp16")]; - int32 var_10599 = const()[name = string("op_10599"), val = int32(2)]; - bool shifted_key_25_interleave_0 = const()[name = string("shifted_key_25_interleave_0"), val = bool(false)]; - tensor shifted_key_25_cast_fp16 = concat(axis = var_10599, interleave = shifted_key_25_interleave_0, values = (key_tail_25_cast_fp16, key_states_57_cast_fp16))[name = string("shifted_key_25_cast_fp16")]; - tensor concat_64 = const()[name = string("concat_64"), val = tensor([12, 0, 0, 0])]; - tensor concat_65 = const()[name = string("concat_65"), val = tensor([13, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_64, begin_mask = model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0, end = concat_65, end_mask = model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_25_stride_0, update = shifted_key_25_cast_fp16, x = coreml_update_state_79)[name = string("model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_80_write_state")]; - tensor coreml_update_state_80 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_80")]; - tensor value_slice_25_begin_0 = const()[name = string("value_slice_25_begin_0"), val = tensor([34, 0, 0, 0])]; - tensor value_slice_25_end_0 = const()[name = string("value_slice_25_end_0"), val = tensor([35, 1, 512, 256])]; - tensor value_slice_25_end_mask_0 = const()[name = string("value_slice_25_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_25_cast_fp16 = slice_by_index(begin = value_slice_25_begin_0, end = value_slice_25_end_0, end_mask = value_slice_25_end_mask_0, x = coreml_update_state_80)[name = string("value_slice_25_cast_fp16")]; - tensor value_tail_25_begin_0 = const()[name = string("value_tail_25_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_25_end_0 = const()[name = string("value_tail_25_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_25_cast_fp16 = slice_by_index(begin = value_tail_25_begin_0, end = value_tail_25_end_0, x = value_slice_25_cast_fp16)[name = string("value_tail_25_cast_fp16")]; - int32 var_10633 = const()[name = string("op_10633"), val = int32(2)]; - bool shifted_value_25_interleave_0 = const()[name = string("shifted_value_25_interleave_0"), val = bool(false)]; - tensor shifted_value_25_cast_fp16 = concat(axis = var_10633, interleave = shifted_value_25_interleave_0, values = (value_tail_25_cast_fp16, var_10473))[name = string("shifted_value_25_cast_fp16")]; - tensor concat_66 = const()[name = string("concat_66"), val = tensor([34, 0, 0, 0])]; - tensor concat_67 = const()[name = string("concat_67"), val = tensor([35, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_66, begin_mask = model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0, end = concat_67, end_mask = model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_26_stride_0, update = shifted_value_25_cast_fp16, x = coreml_update_state_80)[name = string("model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_81_write_state")]; - tensor coreml_update_state_81 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_81")]; - tensor var_10661_begin_0 = const()[name = string("op_10661_begin_0"), val = tensor([12, 0, 0, 0])]; - tensor var_10661_end_0 = const()[name = string("op_10661_end_0"), val = tensor([13, 1, 512, 256])]; - tensor var_10661_end_mask_0 = const()[name = string("op_10661_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_10661_cast_fp16 = slice_by_index(begin = var_10661_begin_0, end = var_10661_end_0, end_mask = var_10661_end_mask_0, x = coreml_update_state_81)[name = string("op_10661_cast_fp16")]; - tensor var_10668_begin_0 = const()[name = string("op_10668_begin_0"), val = tensor([34, 0, 0, 0])]; - tensor var_10668_end_0 = const()[name = string("op_10668_end_0"), val = tensor([35, 1, 512, 256])]; - tensor var_10668_end_mask_0 = const()[name = string("op_10668_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_10668_cast_fp16 = slice_by_index(begin = var_10668_begin_0, end = var_10668_end_0, end_mask = var_10668_end_mask_0, x = coreml_update_state_81)[name = string("op_10668_cast_fp16")]; - tensor var_10705 = const()[name = string("op_10705"), val = tensor([1, 4, 1, 1])]; - tensor x_229_cast_fp16 = tile(reps = var_10705, x = var_10661_cast_fp16)[name = string("x_229_cast_fp16")]; - tensor var_10725 = const()[name = string("op_10725"), val = tensor([1, 4, 1, 1])]; - tensor x_235_cast_fp16 = tile(reps = var_10725, x = var_10668_cast_fp16)[name = string("x_235_cast_fp16")]; - bool var_10752_transpose_x_1 = const()[name = string("op_10752_transpose_x_1"), val = bool(false)]; - bool var_10752_transpose_y_1 = const()[name = string("op_10752_transpose_y_1"), val = bool(true)]; - tensor var_10752 = matmul(transpose_x = var_10752_transpose_x_1, transpose_y = var_10752_transpose_y_1, x = query_states_57_cast_fp16, y = x_229_cast_fp16)[name = string("op_10752")]; - fp16 var_10753_to_fp16 = const()[name = string("op_10753_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_85_cast_fp16 = mul(x = var_10752, y = var_10753_to_fp16)[name = string("attn_weights_85_cast_fp16")]; - tensor attn_weights_87_cast_fp16 = add(x = attn_weights_85_cast_fp16, y = var_2059)[name = string("attn_weights_87_cast_fp16")]; - int32 var_10788 = const()[name = string("op_10788"), val = int32(-1)]; - tensor attn_weights_89_cast_fp16 = softmax(axis = var_10788, x = attn_weights_87_cast_fp16)[name = string("attn_weights_89_cast_fp16")]; - bool attn_output_141_transpose_x_0 = const()[name = string("attn_output_141_transpose_x_0"), val = bool(false)]; - bool attn_output_141_transpose_y_0 = const()[name = string("attn_output_141_transpose_y_0"), val = bool(false)]; - tensor attn_output_141_cast_fp16 = matmul(transpose_x = attn_output_141_transpose_x_0, transpose_y = attn_output_141_transpose_y_0, x = attn_weights_89_cast_fp16, y = x_235_cast_fp16)[name = string("attn_output_141_cast_fp16")]; - tensor var_10799_perm_0 = const()[name = string("op_10799_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_10803 = const()[name = string("op_10803"), val = tensor([1, 1, 1024])]; - tensor var_10799_cast_fp16 = transpose(perm = var_10799_perm_0, x = attn_output_141_cast_fp16)[name = string("transpose_70")]; - tensor attn_output_145_cast_fp16 = reshape(shape = var_10803, x = var_10799_cast_fp16)[name = string("attn_output_145_cast_fp16")]; - tensor var_10808 = const()[name = string("op_10808"), val = tensor([0, 2, 1])]; - string var_10824_pad_type_0 = const()[name = string("op_10824_pad_type_0"), val = string("valid")]; - int32 var_10824_groups_0 = const()[name = string("op_10824_groups_0"), val = int32(1)]; - tensor var_10824_strides_0 = const()[name = string("op_10824_strides_0"), val = tensor([1])]; - tensor var_10824_pad_0 = const()[name = string("op_10824_pad_0"), val = tensor([0, 0])]; - tensor var_10824_dilations_0 = const()[name = string("op_10824_dilations_0"), val = tensor([1])]; - tensor squeeze_14_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318912256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319797056))))[name = string("squeeze_14_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_10809_cast_fp16 = transpose(perm = var_10808, x = attn_output_145_cast_fp16)[name = string("transpose_69")]; - tensor var_10824_cast_fp16 = conv(dilations = var_10824_dilations_0, groups = var_10824_groups_0, pad = var_10824_pad_0, pad_type = var_10824_pad_type_0, strides = var_10824_strides_0, weight = squeeze_14_cast_fp16_to_fp32_to_fp16_palettized, x = var_10809_cast_fp16)[name = string("op_10824_cast_fp16")]; - tensor var_10828 = const()[name = string("op_10828"), val = tensor([0, 2, 1])]; - int32 var_10839 = const()[name = string("op_10839"), val = int32(-1)]; - fp16 const_558_promoted_to_fp16 = const()[name = string("const_558_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_233_cast_fp16 = transpose(perm = var_10828, x = var_10824_cast_fp16)[name = string("transpose_68")]; - tensor var_10841_cast_fp16 = mul(x = hidden_states_233_cast_fp16, y = const_558_promoted_to_fp16)[name = string("op_10841_cast_fp16")]; - bool input_291_interleave_0 = const()[name = string("input_291_interleave_0"), val = bool(false)]; - tensor input_291_cast_fp16 = concat(axis = var_10839, interleave = input_291_interleave_0, values = (hidden_states_233_cast_fp16, var_10841_cast_fp16))[name = string("input_291_cast_fp16")]; - tensor normed_349_axes_0 = const()[name = string("normed_349_axes_0"), val = tensor([-1])]; - fp16 var_10836_to_fp16 = const()[name = string("op_10836_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_349_cast_fp16 = layer_norm(axes = normed_349_axes_0, epsilon = var_10836_to_fp16, x = input_291_cast_fp16)[name = string("normed_349_cast_fp16")]; - tensor normed_351_begin_0 = const()[name = string("normed_351_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_351_end_0 = const()[name = string("normed_351_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_351_end_mask_0 = const()[name = string("normed_351_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_351_cast_fp16 = slice_by_index(begin = normed_351_begin_0, end = normed_351_end_0, end_mask = normed_351_end_mask_0, x = normed_349_cast_fp16)[name = string("normed_351_cast_fp16")]; - tensor var_10855_to_fp16 = const()[name = string("op_10855_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319815552)))]; - tensor attn_output_149_cast_fp16 = mul(x = normed_351_cast_fp16, y = var_10855_to_fp16)[name = string("attn_output_149_cast_fp16")]; - tensor hidden_states_235_cast_fp16 = add(x = hidden_states_225_cast_fp16, y = attn_output_149_cast_fp16)[name = string("hidden_states_235_cast_fp16")]; - int32 var_10868 = const()[name = string("op_10868"), val = int32(-1)]; - fp16 const_562_promoted_to_fp16 = const()[name = string("const_562_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10870_cast_fp16 = mul(x = hidden_states_235_cast_fp16, y = const_562_promoted_to_fp16)[name = string("op_10870_cast_fp16")]; - bool input_293_interleave_0 = const()[name = string("input_293_interleave_0"), val = bool(false)]; - tensor input_293_cast_fp16 = concat(axis = var_10868, interleave = input_293_interleave_0, values = (hidden_states_235_cast_fp16, var_10870_cast_fp16))[name = string("input_293_cast_fp16")]; - tensor normed_353_axes_0 = const()[name = string("normed_353_axes_0"), val = tensor([-1])]; - fp16 var_10865_to_fp16 = const()[name = string("op_10865_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_353_cast_fp16 = layer_norm(axes = normed_353_axes_0, epsilon = var_10865_to_fp16, x = input_293_cast_fp16)[name = string("normed_353_cast_fp16")]; - tensor normed_355_begin_0 = const()[name = string("normed_355_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_355_end_0 = const()[name = string("normed_355_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_355_end_mask_0 = const()[name = string("normed_355_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_355_cast_fp16 = slice_by_index(begin = normed_355_begin_0, end = normed_355_end_0, end_mask = normed_355_end_mask_0, x = normed_353_cast_fp16)[name = string("normed_355_cast_fp16")]; - tensor var_10884_to_fp16 = const()[name = string("op_10884_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319817920)))]; - tensor x_237_cast_fp16 = mul(x = normed_355_cast_fp16, y = var_10884_to_fp16)[name = string("x_237_cast_fp16")]; - tensor var_10896 = const()[name = string("op_10896"), val = tensor([0, 2, 1])]; - tensor input_295_axes_0 = const()[name = string("input_295_axes_0"), val = tensor([2])]; - tensor var_10897_cast_fp16 = transpose(perm = var_10896, x = x_237_cast_fp16)[name = string("transpose_67")]; - tensor input_295_cast_fp16 = expand_dims(axes = input_295_axes_0, x = var_10897_cast_fp16)[name = string("input_295_cast_fp16")]; - string x_239_pad_type_0 = const()[name = string("x_239_pad_type_0"), val = string("valid")]; - tensor x_239_strides_0 = const()[name = string("x_239_strides_0"), val = tensor([1, 1])]; - tensor x_239_pad_0 = const()[name = string("x_239_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_239_dilations_0 = const()[name = string("x_239_dilations_0"), val = tensor([1, 1])]; - int32 x_239_groups_0 = const()[name = string("x_239_groups_0"), val = int32(1)]; - tensor model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(731283584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(737255616))))[name = string("model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_239_cast_fp16 = conv(dilations = x_239_dilations_0, groups = x_239_groups_0, pad = x_239_pad_0, pad_type = x_239_pad_type_0, strides = x_239_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_295_cast_fp16)[name = string("x_239_cast_fp16")]; - string b_29_pad_type_0 = const()[name = string("b_29_pad_type_0"), val = string("valid")]; - tensor b_29_strides_0 = const()[name = string("b_29_strides_0"), val = tensor([1, 1])]; - tensor b_29_pad_0 = const()[name = string("b_29_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_29_dilations_0 = const()[name = string("b_29_dilations_0"), val = tensor([1, 1])]; - int32 b_29_groups_0 = const()[name = string("b_29_groups_0"), val = int32(1)]; - tensor model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(737366272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743338304))))[name = string("model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_29_cast_fp16 = conv(dilations = b_29_dilations_0, groups = b_29_groups_0, pad = b_29_pad_0, pad_type = b_29_pad_type_0, strides = b_29_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_295_cast_fp16)[name = string("b_29_cast_fp16")]; - string var_10922_mode_0 = const()[name = string("op_10922_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_10922_cast_fp16 = gelu(mode = var_10922_mode_0, x = x_239_cast_fp16)[name = string("op_10922_cast_fp16")]; - tensor input_297_cast_fp16 = mul(x = var_10922_cast_fp16, y = b_29_cast_fp16)[name = string("input_297_cast_fp16")]; - string e_29_pad_type_0 = const()[name = string("e_29_pad_type_0"), val = string("valid")]; - tensor e_29_strides_0 = const()[name = string("e_29_strides_0"), val = tensor([1, 1])]; - tensor e_29_pad_0 = const()[name = string("e_29_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_29_dilations_0 = const()[name = string("e_29_dilations_0"), val = tensor([1, 1])]; - int32 e_29_groups_0 = const()[name = string("e_29_groups_0"), val = int32(1)]; - tensor model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(331985664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337957696))))[name = string("model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_29_cast_fp16 = conv(dilations = e_29_dilations_0, groups = e_29_groups_0, pad = e_29_pad_0, pad_type = e_29_pad_type_0, strides = e_29_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_297_cast_fp16)[name = string("e_29_cast_fp16")]; - tensor var_10930_axes_0 = const()[name = string("op_10930_axes_0"), val = tensor([2])]; - tensor var_10930_cast_fp16 = squeeze(axes = var_10930_axes_0, x = e_29_cast_fp16)[name = string("op_10930_cast_fp16")]; - tensor var_10931 = const()[name = string("op_10931"), val = tensor([0, 2, 1])]; - int32 var_10942 = const()[name = string("op_10942"), val = int32(-1)]; - fp16 const_566_promoted_to_fp16 = const()[name = string("const_566_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_237_cast_fp16 = transpose(perm = var_10931, x = var_10930_cast_fp16)[name = string("transpose_66")]; - tensor var_10944_cast_fp16 = mul(x = hidden_states_237_cast_fp16, y = const_566_promoted_to_fp16)[name = string("op_10944_cast_fp16")]; - bool input_299_interleave_0 = const()[name = string("input_299_interleave_0"), val = bool(false)]; - tensor input_299_cast_fp16 = concat(axis = var_10942, interleave = input_299_interleave_0, values = (hidden_states_237_cast_fp16, var_10944_cast_fp16))[name = string("input_299_cast_fp16")]; - tensor normed_357_axes_0 = const()[name = string("normed_357_axes_0"), val = tensor([-1])]; - fp16 var_10939_to_fp16 = const()[name = string("op_10939_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_357_cast_fp16 = layer_norm(axes = normed_357_axes_0, epsilon = var_10939_to_fp16, x = input_299_cast_fp16)[name = string("normed_357_cast_fp16")]; - tensor normed_359_begin_0 = const()[name = string("normed_359_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_359_end_0 = const()[name = string("normed_359_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_359_end_mask_0 = const()[name = string("normed_359_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_359_cast_fp16 = slice_by_index(begin = normed_359_begin_0, end = normed_359_end_0, end_mask = normed_359_end_mask_0, x = normed_357_cast_fp16)[name = string("normed_359_cast_fp16")]; - tensor var_10958_to_fp16 = const()[name = string("op_10958_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337976192)))]; - tensor hidden_states_239_cast_fp16 = mul(x = normed_359_cast_fp16, y = var_10958_to_fp16)[name = string("hidden_states_239_cast_fp16")]; - tensor hidden_states_241_cast_fp16 = add(x = hidden_states_235_cast_fp16, y = hidden_states_239_cast_fp16)[name = string("hidden_states_241_cast_fp16")]; - int32 var_11009 = const()[name = string("op_11009"), val = int32(-1)]; - fp16 const_570_promoted_to_fp16 = const()[name = string("const_570_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11011_cast_fp16 = mul(x = hidden_states_241_cast_fp16, y = const_570_promoted_to_fp16)[name = string("op_11011_cast_fp16")]; - bool input_301_interleave_0 = const()[name = string("input_301_interleave_0"), val = bool(false)]; - tensor input_301_cast_fp16 = concat(axis = var_11009, interleave = input_301_interleave_0, values = (hidden_states_241_cast_fp16, var_11011_cast_fp16))[name = string("input_301_cast_fp16")]; - tensor normed_361_axes_0 = const()[name = string("normed_361_axes_0"), val = tensor([-1])]; - fp16 var_11006_to_fp16 = const()[name = string("op_11006_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_361_cast_fp16 = layer_norm(axes = normed_361_axes_0, epsilon = var_11006_to_fp16, x = input_301_cast_fp16)[name = string("normed_361_cast_fp16")]; - tensor normed_363_begin_0 = const()[name = string("normed_363_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_363_end_0 = const()[name = string("normed_363_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_363_end_mask_0 = const()[name = string("normed_363_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_363_cast_fp16 = slice_by_index(begin = normed_363_begin_0, end = normed_363_end_0, end_mask = normed_363_end_mask_0, x = normed_361_cast_fp16)[name = string("normed_363_cast_fp16")]; - tensor var_11025_to_fp16 = const()[name = string("op_11025_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337978560)))]; - tensor hidden_states_243_cast_fp16 = mul(x = normed_363_cast_fp16, y = var_11025_to_fp16)[name = string("hidden_states_243_cast_fp16")]; - tensor var_11030 = const()[name = string("op_11030"), val = tensor([0, 2, 1])]; - tensor var_11033_axes_0 = const()[name = string("op_11033_axes_0"), val = tensor([2])]; - tensor var_11031_cast_fp16 = transpose(perm = var_11030, x = hidden_states_243_cast_fp16)[name = string("transpose_65")]; - tensor var_11033_cast_fp16 = expand_dims(axes = var_11033_axes_0, x = var_11031_cast_fp16)[name = string("op_11033_cast_fp16")]; - string var_11049_pad_type_0 = const()[name = string("op_11049_pad_type_0"), val = string("valid")]; - tensor var_11049_strides_0 = const()[name = string("op_11049_strides_0"), val = tensor([1, 1])]; - tensor var_11049_pad_0 = const()[name = string("op_11049_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_11049_dilations_0 = const()[name = string("op_11049_dilations_0"), val = tensor([1, 1])]; - int32 var_11049_groups_0 = const()[name = string("op_11049_groups_0"), val = int32(1)]; - tensor var_11049 = conv(dilations = var_11049_dilations_0, groups = var_11049_groups_0, pad = var_11049_pad_0, pad_type = var_11049_pad_type_0, strides = var_11049_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_11033_cast_fp16)[name = string("op_11049")]; - tensor var_11054 = const()[name = string("op_11054"), val = tensor([1, 4, 1, 256])]; - tensor var_11055 = reshape(shape = var_11054, x = var_11049)[name = string("op_11055")]; - string var_11071_pad_type_0 = const()[name = string("op_11071_pad_type_0"), val = string("valid")]; - tensor var_11071_strides_0 = const()[name = string("op_11071_strides_0"), val = tensor([1, 1])]; - tensor var_11071_pad_0 = const()[name = string("op_11071_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_11071_dilations_0 = const()[name = string("op_11071_dilations_0"), val = tensor([1, 1])]; - int32 var_11071_groups_0 = const()[name = string("op_11071_groups_0"), val = int32(1)]; - tensor var_11071 = conv(dilations = var_11071_dilations_0, groups = var_11071_groups_0, pad = var_11071_pad_0, pad_type = var_11071_pad_type_0, strides = var_11071_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_11033_cast_fp16)[name = string("op_11071")]; - tensor var_11076 = const()[name = string("op_11076"), val = tensor([1, 1, 1, 256])]; - tensor var_11077 = reshape(shape = var_11076, x = var_11071)[name = string("op_11077")]; - string var_11093_pad_type_0 = const()[name = string("op_11093_pad_type_0"), val = string("valid")]; - tensor var_11093_strides_0 = const()[name = string("op_11093_strides_0"), val = tensor([1, 1])]; - tensor var_11093_pad_0 = const()[name = string("op_11093_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_11093_dilations_0 = const()[name = string("op_11093_dilations_0"), val = tensor([1, 1])]; - int32 var_11093_groups_0 = const()[name = string("op_11093_groups_0"), val = int32(1)]; - tensor var_11093 = conv(dilations = var_11093_dilations_0, groups = var_11093_groups_0, pad = var_11093_pad_0, pad_type = var_11093_pad_type_0, strides = var_11093_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_11033_cast_fp16)[name = string("op_11093")]; - tensor var_11098 = const()[name = string("op_11098"), val = tensor([1, 1, 1, 256])]; - tensor var_11099 = reshape(shape = var_11098, x = var_11093)[name = string("op_11099")]; - int32 var_11114 = const()[name = string("op_11114"), val = int32(-1)]; - fp16 const_574_promoted = const()[name = string("const_574_promoted"), val = fp16(-0x1p+0)]; - tensor var_11116 = mul(x = var_11055, y = const_574_promoted)[name = string("op_11116")]; - bool input_305_interleave_0 = const()[name = string("input_305_interleave_0"), val = bool(false)]; - tensor input_305 = concat(axis = var_11114, interleave = input_305_interleave_0, values = (var_11055, var_11116))[name = string("input_305")]; - tensor normed_365_axes_0 = const()[name = string("normed_365_axes_0"), val = tensor([-1])]; - fp16 var_11111_to_fp16 = const()[name = string("op_11111_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_365_cast_fp16 = layer_norm(axes = normed_365_axes_0, epsilon = var_11111_to_fp16, x = input_305)[name = string("normed_365_cast_fp16")]; - tensor normed_367_begin_0 = const()[name = string("normed_367_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_367_end_0 = const()[name = string("normed_367_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_367_end_mask_0 = const()[name = string("normed_367_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_367 = slice_by_index(begin = normed_367_begin_0, end = normed_367_end_0, end_mask = normed_367_end_mask_0, x = normed_365_cast_fp16)[name = string("normed_367")]; - tensor var_11130_to_fp16 = const()[name = string("op_11130_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337980928)))]; - tensor q_31_cast_fp16 = mul(x = normed_367, y = var_11130_to_fp16)[name = string("q_31_cast_fp16")]; - int32 var_11141 = const()[name = string("op_11141"), val = int32(-1)]; - fp16 const_578_promoted = const()[name = string("const_578_promoted"), val = fp16(-0x1p+0)]; - tensor var_11143 = mul(x = var_11077, y = const_578_promoted)[name = string("op_11143")]; - bool input_307_interleave_0 = const()[name = string("input_307_interleave_0"), val = bool(false)]; - tensor input_307 = concat(axis = var_11141, interleave = input_307_interleave_0, values = (var_11077, var_11143))[name = string("input_307")]; - tensor normed_369_axes_0 = const()[name = string("normed_369_axes_0"), val = tensor([-1])]; - fp16 var_11138_to_fp16 = const()[name = string("op_11138_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_369_cast_fp16 = layer_norm(axes = normed_369_axes_0, epsilon = var_11138_to_fp16, x = input_307)[name = string("normed_369_cast_fp16")]; - tensor normed_371_begin_0 = const()[name = string("normed_371_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_371_end_0 = const()[name = string("normed_371_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_371_end_mask_0 = const()[name = string("normed_371_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_371 = slice_by_index(begin = normed_371_begin_0, end = normed_371_end_0, end_mask = normed_371_end_mask_0, x = normed_369_cast_fp16)[name = string("normed_371")]; - tensor var_11157_to_fp16 = const()[name = string("op_11157_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337981504)))]; - tensor k_31_cast_fp16 = mul(x = normed_371, y = var_11157_to_fp16)[name = string("k_31_cast_fp16")]; - tensor var_11159_cast_fp16 = mul(x = q_31_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11159_cast_fp16")]; - tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_31_cast_fp16)[name = string("x1_61_cast_fp16")]; - tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_31_cast_fp16)[name = string("x2_61_cast_fp16")]; - fp16 const_584_promoted_to_fp16 = const()[name = string("const_584_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11180_cast_fp16 = mul(x = x2_61_cast_fp16, y = const_584_promoted_to_fp16)[name = string("op_11180_cast_fp16")]; - int32 var_11182 = const()[name = string("op_11182"), val = int32(-1)]; - bool var_11183_interleave_0 = const()[name = string("op_11183_interleave_0"), val = bool(false)]; - tensor var_11183_cast_fp16 = concat(axis = var_11182, interleave = var_11183_interleave_0, values = (var_11180_cast_fp16, x1_61_cast_fp16))[name = string("op_11183_cast_fp16")]; - tensor var_11184_cast_fp16 = mul(x = var_11183_cast_fp16, y = sin_1_cast_fp16)[name = string("op_11184_cast_fp16")]; - tensor query_states_61_cast_fp16 = add(x = var_11159_cast_fp16, y = var_11184_cast_fp16)[name = string("query_states_61_cast_fp16")]; - tensor var_11187_cast_fp16 = mul(x = k_31_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11187_cast_fp16")]; - tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_63_cast_fp16 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_31_cast_fp16)[name = string("x1_63_cast_fp16")]; - tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_63_cast_fp16 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_31_cast_fp16)[name = string("x2_63_cast_fp16")]; - fp16 const_587_promoted_to_fp16 = const()[name = string("const_587_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11208_cast_fp16 = mul(x = x2_63_cast_fp16, y = const_587_promoted_to_fp16)[name = string("op_11208_cast_fp16")]; - int32 var_11210 = const()[name = string("op_11210"), val = int32(-1)]; - bool var_11211_interleave_0 = const()[name = string("op_11211_interleave_0"), val = bool(false)]; - tensor var_11211_cast_fp16 = concat(axis = var_11210, interleave = var_11211_interleave_0, values = (var_11208_cast_fp16, x1_63_cast_fp16))[name = string("op_11211_cast_fp16")]; - tensor var_11212_cast_fp16 = mul(x = var_11211_cast_fp16, y = sin_1_cast_fp16)[name = string("op_11212_cast_fp16")]; - tensor key_states_61_cast_fp16 = add(x = var_11187_cast_fp16, y = var_11212_cast_fp16)[name = string("key_states_61_cast_fp16")]; - tensor key_slice_27_begin_0 = const()[name = string("key_slice_27_begin_0"), val = tensor([13, 0, 0, 0])]; - tensor key_slice_27_end_0 = const()[name = string("key_slice_27_end_0"), val = tensor([14, 1, 512, 256])]; - tensor key_slice_27_end_mask_0 = const()[name = string("key_slice_27_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_27_cast_fp16 = slice_by_index(begin = key_slice_27_begin_0, end = key_slice_27_end_0, end_mask = key_slice_27_end_mask_0, x = coreml_update_state_81)[name = string("key_slice_27_cast_fp16")]; - tensor key_tail_27_begin_0 = const()[name = string("key_tail_27_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_27_end_0 = const()[name = string("key_tail_27_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_27_cast_fp16 = slice_by_index(begin = key_tail_27_begin_0, end = key_tail_27_end_0, x = key_slice_27_cast_fp16)[name = string("key_tail_27_cast_fp16")]; - int32 var_11225 = const()[name = string("op_11225"), val = int32(2)]; - bool shifted_key_27_interleave_0 = const()[name = string("shifted_key_27_interleave_0"), val = bool(false)]; - tensor shifted_key_27_cast_fp16 = concat(axis = var_11225, interleave = shifted_key_27_interleave_0, values = (key_tail_27_cast_fp16, key_states_61_cast_fp16))[name = string("shifted_key_27_cast_fp16")]; - tensor concat_68 = const()[name = string("concat_68"), val = tensor([13, 0, 0, 0])]; - tensor concat_69 = const()[name = string("concat_69"), val = tensor([14, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_68, begin_mask = model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0, end = concat_69, end_mask = model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_27_stride_0, update = shifted_key_27_cast_fp16, x = coreml_update_state_81)[name = string("model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_82_write_state")]; - tensor coreml_update_state_82 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_82")]; - tensor value_slice_27_begin_0 = const()[name = string("value_slice_27_begin_0"), val = tensor([35, 0, 0, 0])]; - tensor value_slice_27_end_0 = const()[name = string("value_slice_27_end_0"), val = tensor([36, 1, 512, 256])]; - tensor value_slice_27_end_mask_0 = const()[name = string("value_slice_27_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_27_cast_fp16 = slice_by_index(begin = value_slice_27_begin_0, end = value_slice_27_end_0, end_mask = value_slice_27_end_mask_0, x = coreml_update_state_82)[name = string("value_slice_27_cast_fp16")]; - tensor value_tail_27_begin_0 = const()[name = string("value_tail_27_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_27_end_0 = const()[name = string("value_tail_27_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_27_cast_fp16 = slice_by_index(begin = value_tail_27_begin_0, end = value_tail_27_end_0, x = value_slice_27_cast_fp16)[name = string("value_tail_27_cast_fp16")]; - int32 var_11259 = const()[name = string("op_11259"), val = int32(2)]; - bool shifted_value_27_interleave_0 = const()[name = string("shifted_value_27_interleave_0"), val = bool(false)]; - tensor shifted_value_27_cast_fp16 = concat(axis = var_11259, interleave = shifted_value_27_interleave_0, values = (value_tail_27_cast_fp16, var_11099))[name = string("shifted_value_27_cast_fp16")]; - tensor concat_70 = const()[name = string("concat_70"), val = tensor([35, 0, 0, 0])]; - tensor concat_71 = const()[name = string("concat_71"), val = tensor([36, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_70, begin_mask = model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0, end = concat_71, end_mask = model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_28_stride_0, update = shifted_value_27_cast_fp16, x = coreml_update_state_82)[name = string("model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_83_write_state")]; - tensor coreml_update_state_83 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_83")]; - tensor var_11287_begin_0 = const()[name = string("op_11287_begin_0"), val = tensor([13, 0, 0, 0])]; - tensor var_11287_end_0 = const()[name = string("op_11287_end_0"), val = tensor([14, 1, 512, 256])]; - tensor var_11287_end_mask_0 = const()[name = string("op_11287_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_11287_cast_fp16 = slice_by_index(begin = var_11287_begin_0, end = var_11287_end_0, end_mask = var_11287_end_mask_0, x = coreml_update_state_83)[name = string("op_11287_cast_fp16")]; - tensor var_11294_begin_0 = const()[name = string("op_11294_begin_0"), val = tensor([35, 0, 0, 0])]; - tensor var_11294_end_0 = const()[name = string("op_11294_end_0"), val = tensor([36, 1, 512, 256])]; - tensor var_11294_end_mask_0 = const()[name = string("op_11294_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_11294_cast_fp16 = slice_by_index(begin = var_11294_begin_0, end = var_11294_end_0, end_mask = var_11294_end_mask_0, x = coreml_update_state_83)[name = string("op_11294_cast_fp16")]; - tensor var_11331 = const()[name = string("op_11331"), val = tensor([1, 4, 1, 1])]; - tensor x_245_cast_fp16 = tile(reps = var_11331, x = var_11287_cast_fp16)[name = string("x_245_cast_fp16")]; - tensor var_11351 = const()[name = string("op_11351"), val = tensor([1, 4, 1, 1])]; - tensor x_251_cast_fp16 = tile(reps = var_11351, x = var_11294_cast_fp16)[name = string("x_251_cast_fp16")]; - bool var_11378_transpose_x_1 = const()[name = string("op_11378_transpose_x_1"), val = bool(false)]; - bool var_11378_transpose_y_1 = const()[name = string("op_11378_transpose_y_1"), val = bool(true)]; - tensor var_11378 = matmul(transpose_x = var_11378_transpose_x_1, transpose_y = var_11378_transpose_y_1, x = query_states_61_cast_fp16, y = x_245_cast_fp16)[name = string("op_11378")]; - fp16 var_11379_to_fp16 = const()[name = string("op_11379_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_91_cast_fp16 = mul(x = var_11378, y = var_11379_to_fp16)[name = string("attn_weights_91_cast_fp16")]; - tensor attn_weights_93_cast_fp16 = add(x = attn_weights_91_cast_fp16, y = var_2059)[name = string("attn_weights_93_cast_fp16")]; - int32 var_11414 = const()[name = string("op_11414"), val = int32(-1)]; - tensor attn_weights_95_cast_fp16 = softmax(axis = var_11414, x = attn_weights_93_cast_fp16)[name = string("attn_weights_95_cast_fp16")]; - bool attn_output_151_transpose_x_0 = const()[name = string("attn_output_151_transpose_x_0"), val = bool(false)]; - bool attn_output_151_transpose_y_0 = const()[name = string("attn_output_151_transpose_y_0"), val = bool(false)]; - tensor attn_output_151_cast_fp16 = matmul(transpose_x = attn_output_151_transpose_x_0, transpose_y = attn_output_151_transpose_y_0, x = attn_weights_95_cast_fp16, y = x_251_cast_fp16)[name = string("attn_output_151_cast_fp16")]; - tensor var_11425_perm_0 = const()[name = string("op_11425_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_11429 = const()[name = string("op_11429"), val = tensor([1, 1, 1024])]; - tensor var_11425_cast_fp16 = transpose(perm = var_11425_perm_0, x = attn_output_151_cast_fp16)[name = string("transpose_64")]; - tensor attn_output_155_cast_fp16 = reshape(shape = var_11429, x = var_11425_cast_fp16)[name = string("attn_output_155_cast_fp16")]; - tensor var_11434 = const()[name = string("op_11434"), val = tensor([0, 2, 1])]; - string var_11450_pad_type_0 = const()[name = string("op_11450_pad_type_0"), val = string("valid")]; - int32 var_11450_groups_0 = const()[name = string("op_11450_groups_0"), val = int32(1)]; - tensor var_11450_strides_0 = const()[name = string("op_11450_strides_0"), val = tensor([1])]; - tensor var_11450_pad_0 = const()[name = string("op_11450_pad_0"), val = tensor([0, 0])]; - tensor var_11450_dilations_0 = const()[name = string("op_11450_dilations_0"), val = tensor([1])]; - tensor squeeze_15_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337982080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338866880))))[name = string("squeeze_15_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_11435_cast_fp16 = transpose(perm = var_11434, x = attn_output_155_cast_fp16)[name = string("transpose_63")]; - tensor var_11450_cast_fp16 = conv(dilations = var_11450_dilations_0, groups = var_11450_groups_0, pad = var_11450_pad_0, pad_type = var_11450_pad_type_0, strides = var_11450_strides_0, weight = squeeze_15_cast_fp16_to_fp32_to_fp16_palettized, x = var_11435_cast_fp16)[name = string("op_11450_cast_fp16")]; - tensor var_11454 = const()[name = string("op_11454"), val = tensor([0, 2, 1])]; - int32 var_11465 = const()[name = string("op_11465"), val = int32(-1)]; - fp16 const_596_promoted_to_fp16 = const()[name = string("const_596_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_249_cast_fp16 = transpose(perm = var_11454, x = var_11450_cast_fp16)[name = string("transpose_62")]; - tensor var_11467_cast_fp16 = mul(x = hidden_states_249_cast_fp16, y = const_596_promoted_to_fp16)[name = string("op_11467_cast_fp16")]; - bool input_311_interleave_0 = const()[name = string("input_311_interleave_0"), val = bool(false)]; - tensor input_311_cast_fp16 = concat(axis = var_11465, interleave = input_311_interleave_0, values = (hidden_states_249_cast_fp16, var_11467_cast_fp16))[name = string("input_311_cast_fp16")]; - tensor normed_373_axes_0 = const()[name = string("normed_373_axes_0"), val = tensor([-1])]; - fp16 var_11462_to_fp16 = const()[name = string("op_11462_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_373_cast_fp16 = layer_norm(axes = normed_373_axes_0, epsilon = var_11462_to_fp16, x = input_311_cast_fp16)[name = string("normed_373_cast_fp16")]; - tensor normed_375_begin_0 = const()[name = string("normed_375_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_375_end_0 = const()[name = string("normed_375_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_375_end_mask_0 = const()[name = string("normed_375_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_375_cast_fp16 = slice_by_index(begin = normed_375_begin_0, end = normed_375_end_0, end_mask = normed_375_end_mask_0, x = normed_373_cast_fp16)[name = string("normed_375_cast_fp16")]; - tensor var_11481_to_fp16 = const()[name = string("op_11481_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338885376)))]; - tensor attn_output_159_cast_fp16 = mul(x = normed_375_cast_fp16, y = var_11481_to_fp16)[name = string("attn_output_159_cast_fp16")]; - tensor hidden_states_251_cast_fp16 = add(x = hidden_states_241_cast_fp16, y = attn_output_159_cast_fp16)[name = string("hidden_states_251_cast_fp16")]; - int32 var_11494 = const()[name = string("op_11494"), val = int32(-1)]; - fp16 const_600_promoted_to_fp16 = const()[name = string("const_600_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11496_cast_fp16 = mul(x = hidden_states_251_cast_fp16, y = const_600_promoted_to_fp16)[name = string("op_11496_cast_fp16")]; - bool input_313_interleave_0 = const()[name = string("input_313_interleave_0"), val = bool(false)]; - tensor input_313_cast_fp16 = concat(axis = var_11494, interleave = input_313_interleave_0, values = (hidden_states_251_cast_fp16, var_11496_cast_fp16))[name = string("input_313_cast_fp16")]; - tensor normed_377_axes_0 = const()[name = string("normed_377_axes_0"), val = tensor([-1])]; - fp16 var_11491_to_fp16 = const()[name = string("op_11491_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_377_cast_fp16 = layer_norm(axes = normed_377_axes_0, epsilon = var_11491_to_fp16, x = input_313_cast_fp16)[name = string("normed_377_cast_fp16")]; - tensor normed_379_begin_0 = const()[name = string("normed_379_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_379_end_0 = const()[name = string("normed_379_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_379_end_mask_0 = const()[name = string("normed_379_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_379_cast_fp16 = slice_by_index(begin = normed_379_begin_0, end = normed_379_end_0, end_mask = normed_379_end_mask_0, x = normed_377_cast_fp16)[name = string("normed_379_cast_fp16")]; - tensor var_11510_to_fp16 = const()[name = string("op_11510_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338887744)))]; - tensor x_253_cast_fp16 = mul(x = normed_379_cast_fp16, y = var_11510_to_fp16)[name = string("x_253_cast_fp16")]; - tensor var_11522 = const()[name = string("op_11522"), val = tensor([0, 2, 1])]; - tensor input_315_axes_0 = const()[name = string("input_315_axes_0"), val = tensor([2])]; - tensor var_11523_cast_fp16 = transpose(perm = var_11522, x = x_253_cast_fp16)[name = string("transpose_61")]; - tensor input_315_cast_fp16 = expand_dims(axes = input_315_axes_0, x = var_11523_cast_fp16)[name = string("input_315_cast_fp16")]; - string x_255_pad_type_0 = const()[name = string("x_255_pad_type_0"), val = string("valid")]; - tensor x_255_strides_0 = const()[name = string("x_255_strides_0"), val = tensor([1, 1])]; - tensor x_255_pad_0 = const()[name = string("x_255_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_255_dilations_0 = const()[name = string("x_255_dilations_0"), val = tensor([1, 1])]; - int32 x_255_groups_0 = const()[name = string("x_255_groups_0"), val = int32(1)]; - tensor model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(743448960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(749420992))))[name = string("model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_255_cast_fp16 = conv(dilations = x_255_dilations_0, groups = x_255_groups_0, pad = x_255_pad_0, pad_type = x_255_pad_type_0, strides = x_255_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_315_cast_fp16)[name = string("x_255_cast_fp16")]; - string b_31_pad_type_0 = const()[name = string("b_31_pad_type_0"), val = string("valid")]; - tensor b_31_strides_0 = const()[name = string("b_31_strides_0"), val = tensor([1, 1])]; - tensor b_31_pad_0 = const()[name = string("b_31_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_31_dilations_0 = const()[name = string("b_31_dilations_0"), val = tensor([1, 1])]; - int32 b_31_groups_0 = const()[name = string("b_31_groups_0"), val = int32(1)]; - tensor model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(749531648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755503680))))[name = string("model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_31_cast_fp16 = conv(dilations = b_31_dilations_0, groups = b_31_groups_0, pad = b_31_pad_0, pad_type = b_31_pad_type_0, strides = b_31_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_315_cast_fp16)[name = string("b_31_cast_fp16")]; - string var_11548_mode_0 = const()[name = string("op_11548_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_11548_cast_fp16 = gelu(mode = var_11548_mode_0, x = x_255_cast_fp16)[name = string("op_11548_cast_fp16")]; - tensor input_317_cast_fp16 = mul(x = var_11548_cast_fp16, y = b_31_cast_fp16)[name = string("input_317_cast_fp16")]; - string e_31_pad_type_0 = const()[name = string("e_31_pad_type_0"), val = string("valid")]; - tensor e_31_strides_0 = const()[name = string("e_31_strides_0"), val = tensor([1, 1])]; - tensor e_31_pad_0 = const()[name = string("e_31_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_31_dilations_0 = const()[name = string("e_31_dilations_0"), val = tensor([1, 1])]; - int32 e_31_groups_0 = const()[name = string("e_31_groups_0"), val = int32(1)]; - tensor model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351055488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357027520))))[name = string("model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_31_cast_fp16 = conv(dilations = e_31_dilations_0, groups = e_31_groups_0, pad = e_31_pad_0, pad_type = e_31_pad_type_0, strides = e_31_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_317_cast_fp16)[name = string("e_31_cast_fp16")]; - tensor var_11556_axes_0 = const()[name = string("op_11556_axes_0"), val = tensor([2])]; - tensor var_11556_cast_fp16 = squeeze(axes = var_11556_axes_0, x = e_31_cast_fp16)[name = string("op_11556_cast_fp16")]; - tensor var_11557 = const()[name = string("op_11557"), val = tensor([0, 2, 1])]; - int32 var_11568 = const()[name = string("op_11568"), val = int32(-1)]; - fp16 const_604_promoted_to_fp16 = const()[name = string("const_604_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_253_cast_fp16 = transpose(perm = var_11557, x = var_11556_cast_fp16)[name = string("transpose_60")]; - tensor var_11570_cast_fp16 = mul(x = hidden_states_253_cast_fp16, y = const_604_promoted_to_fp16)[name = string("op_11570_cast_fp16")]; - bool input_319_interleave_0 = const()[name = string("input_319_interleave_0"), val = bool(false)]; - tensor input_319_cast_fp16 = concat(axis = var_11568, interleave = input_319_interleave_0, values = (hidden_states_253_cast_fp16, var_11570_cast_fp16))[name = string("input_319_cast_fp16")]; - tensor normed_381_axes_0 = const()[name = string("normed_381_axes_0"), val = tensor([-1])]; - fp16 var_11565_to_fp16 = const()[name = string("op_11565_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_381_cast_fp16 = layer_norm(axes = normed_381_axes_0, epsilon = var_11565_to_fp16, x = input_319_cast_fp16)[name = string("normed_381_cast_fp16")]; - tensor normed_383_begin_0 = const()[name = string("normed_383_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_383_end_0 = const()[name = string("normed_383_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_383_end_mask_0 = const()[name = string("normed_383_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_383_cast_fp16 = slice_by_index(begin = normed_383_begin_0, end = normed_383_end_0, end_mask = normed_383_end_mask_0, x = normed_381_cast_fp16)[name = string("normed_383_cast_fp16")]; - tensor var_11584_to_fp16 = const()[name = string("op_11584_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357046016)))]; - tensor hidden_states_255_cast_fp16 = mul(x = normed_383_cast_fp16, y = var_11584_to_fp16)[name = string("hidden_states_255_cast_fp16")]; - tensor hidden_states_257_cast_fp16 = add(x = hidden_states_251_cast_fp16, y = hidden_states_255_cast_fp16)[name = string("hidden_states_257_cast_fp16")]; - int32 var_11635 = const()[name = string("op_11635"), val = int32(-1)]; - fp16 const_608_promoted_to_fp16 = const()[name = string("const_608_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11637_cast_fp16 = mul(x = hidden_states_257_cast_fp16, y = const_608_promoted_to_fp16)[name = string("op_11637_cast_fp16")]; - bool input_321_interleave_0 = const()[name = string("input_321_interleave_0"), val = bool(false)]; - tensor input_321_cast_fp16 = concat(axis = var_11635, interleave = input_321_interleave_0, values = (hidden_states_257_cast_fp16, var_11637_cast_fp16))[name = string("input_321_cast_fp16")]; - tensor normed_385_axes_0 = const()[name = string("normed_385_axes_0"), val = tensor([-1])]; - fp16 var_11632_to_fp16 = const()[name = string("op_11632_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_385_cast_fp16 = layer_norm(axes = normed_385_axes_0, epsilon = var_11632_to_fp16, x = input_321_cast_fp16)[name = string("normed_385_cast_fp16")]; - tensor normed_387_begin_0 = const()[name = string("normed_387_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_387_end_0 = const()[name = string("normed_387_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_387_end_mask_0 = const()[name = string("normed_387_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_387_cast_fp16 = slice_by_index(begin = normed_387_begin_0, end = normed_387_end_0, end_mask = normed_387_end_mask_0, x = normed_385_cast_fp16)[name = string("normed_387_cast_fp16")]; - tensor var_11651_to_fp16 = const()[name = string("op_11651_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357048384)))]; - tensor hidden_states_259_cast_fp16 = mul(x = normed_387_cast_fp16, y = var_11651_to_fp16)[name = string("hidden_states_259_cast_fp16")]; - tensor var_11656 = const()[name = string("op_11656"), val = tensor([0, 2, 1])]; - tensor var_11659_axes_0 = const()[name = string("op_11659_axes_0"), val = tensor([2])]; - tensor var_11657_cast_fp16 = transpose(perm = var_11656, x = hidden_states_259_cast_fp16)[name = string("transpose_59")]; - tensor var_11659_cast_fp16 = expand_dims(axes = var_11659_axes_0, x = var_11657_cast_fp16)[name = string("op_11659_cast_fp16")]; - string var_11675_pad_type_0 = const()[name = string("op_11675_pad_type_0"), val = string("valid")]; - tensor var_11675_strides_0 = const()[name = string("op_11675_strides_0"), val = tensor([1, 1])]; - tensor var_11675_pad_0 = const()[name = string("op_11675_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_11675_dilations_0 = const()[name = string("op_11675_dilations_0"), val = tensor([1, 1])]; - int32 var_11675_groups_0 = const()[name = string("op_11675_groups_0"), val = int32(1)]; - tensor var_11675 = conv(dilations = var_11675_dilations_0, groups = var_11675_groups_0, pad = var_11675_pad_0, pad_type = var_11675_pad_type_0, strides = var_11675_strides_0, weight = model_model_layers_16_self_attn_q_proj_weight_palettized, x = var_11659_cast_fp16)[name = string("op_11675")]; - tensor var_11680 = const()[name = string("op_11680"), val = tensor([1, 4, 1, 256])]; - tensor var_11681 = reshape(shape = var_11680, x = var_11675)[name = string("op_11681")]; - string var_11697_pad_type_0 = const()[name = string("op_11697_pad_type_0"), val = string("valid")]; - tensor var_11697_strides_0 = const()[name = string("op_11697_strides_0"), val = tensor([1, 1])]; - tensor var_11697_pad_0 = const()[name = string("op_11697_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_11697_dilations_0 = const()[name = string("op_11697_dilations_0"), val = tensor([1, 1])]; - int32 var_11697_groups_0 = const()[name = string("op_11697_groups_0"), val = int32(1)]; - tensor var_11697 = conv(dilations = var_11697_dilations_0, groups = var_11697_groups_0, pad = var_11697_pad_0, pad_type = var_11697_pad_type_0, strides = var_11697_strides_0, weight = model_model_layers_16_self_attn_k_proj_weight_palettized, x = var_11659_cast_fp16)[name = string("op_11697")]; - tensor var_11702 = const()[name = string("op_11702"), val = tensor([1, 1, 1, 256])]; - tensor var_11703 = reshape(shape = var_11702, x = var_11697)[name = string("op_11703")]; - string var_11719_pad_type_0 = const()[name = string("op_11719_pad_type_0"), val = string("valid")]; - tensor var_11719_strides_0 = const()[name = string("op_11719_strides_0"), val = tensor([1, 1])]; - tensor var_11719_pad_0 = const()[name = string("op_11719_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_11719_dilations_0 = const()[name = string("op_11719_dilations_0"), val = tensor([1, 1])]; - int32 var_11719_groups_0 = const()[name = string("op_11719_groups_0"), val = int32(1)]; - tensor var_11719 = conv(dilations = var_11719_dilations_0, groups = var_11719_groups_0, pad = var_11719_pad_0, pad_type = var_11719_pad_type_0, strides = var_11719_strides_0, weight = model_model_layers_16_self_attn_v_proj_weight_palettized, x = var_11659_cast_fp16)[name = string("op_11719")]; - tensor var_11724 = const()[name = string("op_11724"), val = tensor([1, 1, 1, 256])]; - tensor var_11725 = reshape(shape = var_11724, x = var_11719)[name = string("op_11725")]; - int32 var_11740 = const()[name = string("op_11740"), val = int32(-1)]; - fp16 const_612_promoted = const()[name = string("const_612_promoted"), val = fp16(-0x1p+0)]; - tensor var_11742 = mul(x = var_11681, y = const_612_promoted)[name = string("op_11742")]; - bool input_325_interleave_0 = const()[name = string("input_325_interleave_0"), val = bool(false)]; - tensor input_325 = concat(axis = var_11740, interleave = input_325_interleave_0, values = (var_11681, var_11742))[name = string("input_325")]; - tensor normed_389_axes_0 = const()[name = string("normed_389_axes_0"), val = tensor([-1])]; - fp16 var_11737_to_fp16 = const()[name = string("op_11737_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_389_cast_fp16 = layer_norm(axes = normed_389_axes_0, epsilon = var_11737_to_fp16, x = input_325)[name = string("normed_389_cast_fp16")]; - tensor normed_391_begin_0 = const()[name = string("normed_391_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_391_end_0 = const()[name = string("normed_391_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_391_end_mask_0 = const()[name = string("normed_391_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_391 = slice_by_index(begin = normed_391_begin_0, end = normed_391_end_0, end_mask = normed_391_end_mask_0, x = normed_389_cast_fp16)[name = string("normed_391")]; - tensor var_11756_to_fp16 = const()[name = string("op_11756_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357050752)))]; - tensor q_33_cast_fp16 = mul(x = normed_391, y = var_11756_to_fp16)[name = string("q_33_cast_fp16")]; - int32 var_11767 = const()[name = string("op_11767"), val = int32(-1)]; - fp16 const_616_promoted = const()[name = string("const_616_promoted"), val = fp16(-0x1p+0)]; - tensor var_11769 = mul(x = var_11703, y = const_616_promoted)[name = string("op_11769")]; - bool input_327_interleave_0 = const()[name = string("input_327_interleave_0"), val = bool(false)]; - tensor input_327 = concat(axis = var_11767, interleave = input_327_interleave_0, values = (var_11703, var_11769))[name = string("input_327")]; - tensor normed_393_axes_0 = const()[name = string("normed_393_axes_0"), val = tensor([-1])]; - fp16 var_11764_to_fp16 = const()[name = string("op_11764_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_393_cast_fp16 = layer_norm(axes = normed_393_axes_0, epsilon = var_11764_to_fp16, x = input_327)[name = string("normed_393_cast_fp16")]; - tensor normed_395_begin_0 = const()[name = string("normed_395_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_395_end_0 = const()[name = string("normed_395_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_395_end_mask_0 = const()[name = string("normed_395_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_395 = slice_by_index(begin = normed_395_begin_0, end = normed_395_end_0, end_mask = normed_395_end_mask_0, x = normed_393_cast_fp16)[name = string("normed_395")]; - tensor var_11783_to_fp16 = const()[name = string("op_11783_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357051328)))]; - tensor k_33_cast_fp16 = mul(x = normed_395, y = var_11783_to_fp16)[name = string("k_33_cast_fp16")]; - tensor var_11785_cast_fp16 = mul(x = q_33_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11785_cast_fp16")]; - tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_65_cast_fp16 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_33_cast_fp16)[name = string("x1_65_cast_fp16")]; - tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_65_cast_fp16 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_33_cast_fp16)[name = string("x2_65_cast_fp16")]; - fp16 const_622_promoted_to_fp16 = const()[name = string("const_622_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11806_cast_fp16 = mul(x = x2_65_cast_fp16, y = const_622_promoted_to_fp16)[name = string("op_11806_cast_fp16")]; - int32 var_11808 = const()[name = string("op_11808"), val = int32(-1)]; - bool var_11809_interleave_0 = const()[name = string("op_11809_interleave_0"), val = bool(false)]; - tensor var_11809_cast_fp16 = concat(axis = var_11808, interleave = var_11809_interleave_0, values = (var_11806_cast_fp16, x1_65_cast_fp16))[name = string("op_11809_cast_fp16")]; - tensor var_11810_cast_fp16 = mul(x = var_11809_cast_fp16, y = sin_1_cast_fp16)[name = string("op_11810_cast_fp16")]; - tensor query_states_65_cast_fp16 = add(x = var_11785_cast_fp16, y = var_11810_cast_fp16)[name = string("query_states_65_cast_fp16")]; - tensor var_11813_cast_fp16 = mul(x = k_33_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11813_cast_fp16")]; - tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_67_cast_fp16 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_33_cast_fp16)[name = string("x1_67_cast_fp16")]; - tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_67_cast_fp16 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_33_cast_fp16)[name = string("x2_67_cast_fp16")]; - fp16 const_625_promoted_to_fp16 = const()[name = string("const_625_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11834_cast_fp16 = mul(x = x2_67_cast_fp16, y = const_625_promoted_to_fp16)[name = string("op_11834_cast_fp16")]; - int32 var_11836 = const()[name = string("op_11836"), val = int32(-1)]; - bool var_11837_interleave_0 = const()[name = string("op_11837_interleave_0"), val = bool(false)]; - tensor var_11837_cast_fp16 = concat(axis = var_11836, interleave = var_11837_interleave_0, values = (var_11834_cast_fp16, x1_67_cast_fp16))[name = string("op_11837_cast_fp16")]; - tensor var_11838_cast_fp16 = mul(x = var_11837_cast_fp16, y = sin_1_cast_fp16)[name = string("op_11838_cast_fp16")]; - tensor key_states_65_cast_fp16 = add(x = var_11813_cast_fp16, y = var_11838_cast_fp16)[name = string("key_states_65_cast_fp16")]; - tensor key_slice_29_begin_0 = const()[name = string("key_slice_29_begin_0"), val = tensor([14, 0, 0, 0])]; - tensor key_slice_29_end_0 = const()[name = string("key_slice_29_end_0"), val = tensor([15, 1, 512, 256])]; - tensor key_slice_29_end_mask_0 = const()[name = string("key_slice_29_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_29_cast_fp16 = slice_by_index(begin = key_slice_29_begin_0, end = key_slice_29_end_0, end_mask = key_slice_29_end_mask_0, x = coreml_update_state_83)[name = string("key_slice_29_cast_fp16")]; - tensor key_tail_29_begin_0 = const()[name = string("key_tail_29_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_29_end_0 = const()[name = string("key_tail_29_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_29_cast_fp16 = slice_by_index(begin = key_tail_29_begin_0, end = key_tail_29_end_0, x = key_slice_29_cast_fp16)[name = string("key_tail_29_cast_fp16")]; - int32 var_11851 = const()[name = string("op_11851"), val = int32(2)]; - bool shifted_key_29_interleave_0 = const()[name = string("shifted_key_29_interleave_0"), val = bool(false)]; - tensor shifted_key_29_cast_fp16 = concat(axis = var_11851, interleave = shifted_key_29_interleave_0, values = (key_tail_29_cast_fp16, key_states_65_cast_fp16))[name = string("shifted_key_29_cast_fp16")]; - tensor concat_72 = const()[name = string("concat_72"), val = tensor([14, 0, 0, 0])]; - tensor concat_73 = const()[name = string("concat_73"), val = tensor([15, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_72, begin_mask = model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0, end = concat_73, end_mask = model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_29_stride_0, update = shifted_key_29_cast_fp16, x = coreml_update_state_83)[name = string("model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_84_write_state")]; - tensor coreml_update_state_84 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_84")]; - tensor value_slice_29_begin_0 = const()[name = string("value_slice_29_begin_0"), val = tensor([36, 0, 0, 0])]; - tensor value_slice_29_end_0 = const()[name = string("value_slice_29_end_0"), val = tensor([37, 1, 512, 256])]; - tensor value_slice_29_end_mask_0 = const()[name = string("value_slice_29_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_29_cast_fp16 = slice_by_index(begin = value_slice_29_begin_0, end = value_slice_29_end_0, end_mask = value_slice_29_end_mask_0, x = coreml_update_state_84)[name = string("value_slice_29_cast_fp16")]; - tensor value_tail_29_begin_0 = const()[name = string("value_tail_29_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_29_end_0 = const()[name = string("value_tail_29_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_29_cast_fp16 = slice_by_index(begin = value_tail_29_begin_0, end = value_tail_29_end_0, x = value_slice_29_cast_fp16)[name = string("value_tail_29_cast_fp16")]; - int32 var_11885 = const()[name = string("op_11885"), val = int32(2)]; - bool shifted_value_29_interleave_0 = const()[name = string("shifted_value_29_interleave_0"), val = bool(false)]; - tensor shifted_value_29_cast_fp16 = concat(axis = var_11885, interleave = shifted_value_29_interleave_0, values = (value_tail_29_cast_fp16, var_11725))[name = string("shifted_value_29_cast_fp16")]; - tensor concat_74 = const()[name = string("concat_74"), val = tensor([36, 0, 0, 0])]; - tensor concat_75 = const()[name = string("concat_75"), val = tensor([37, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_30_stride_0, update = shifted_value_29_cast_fp16, x = coreml_update_state_84)[name = string("model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_85_write_state")]; - tensor coreml_update_state_85 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_85")]; - tensor var_11913_begin_0 = const()[name = string("op_11913_begin_0"), val = tensor([14, 0, 0, 0])]; - tensor var_11913_end_0 = const()[name = string("op_11913_end_0"), val = tensor([15, 1, 512, 256])]; - tensor var_11913_end_mask_0 = const()[name = string("op_11913_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_11913_cast_fp16 = slice_by_index(begin = var_11913_begin_0, end = var_11913_end_0, end_mask = var_11913_end_mask_0, x = coreml_update_state_85)[name = string("op_11913_cast_fp16")]; - tensor var_11920_begin_0 = const()[name = string("op_11920_begin_0"), val = tensor([36, 0, 0, 0])]; - tensor var_11920_end_0 = const()[name = string("op_11920_end_0"), val = tensor([37, 1, 512, 256])]; - tensor var_11920_end_mask_0 = const()[name = string("op_11920_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_11920_cast_fp16 = slice_by_index(begin = var_11920_begin_0, end = var_11920_end_0, end_mask = var_11920_end_mask_0, x = coreml_update_state_85)[name = string("op_11920_cast_fp16")]; - tensor var_11957 = const()[name = string("op_11957"), val = tensor([1, 4, 1, 1])]; - tensor x_261_cast_fp16 = tile(reps = var_11957, x = var_11913_cast_fp16)[name = string("x_261_cast_fp16")]; - tensor var_11977 = const()[name = string("op_11977"), val = tensor([1, 4, 1, 1])]; - tensor x_267_cast_fp16 = tile(reps = var_11977, x = var_11920_cast_fp16)[name = string("x_267_cast_fp16")]; - bool var_12004_transpose_x_1 = const()[name = string("op_12004_transpose_x_1"), val = bool(false)]; - bool var_12004_transpose_y_1 = const()[name = string("op_12004_transpose_y_1"), val = bool(true)]; - tensor var_12004 = matmul(transpose_x = var_12004_transpose_x_1, transpose_y = var_12004_transpose_y_1, x = query_states_65_cast_fp16, y = x_261_cast_fp16)[name = string("op_12004")]; - fp16 var_12005_to_fp16 = const()[name = string("op_12005_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_97_cast_fp16 = mul(x = var_12004, y = var_12005_to_fp16)[name = string("attn_weights_97_cast_fp16")]; - tensor attn_weights_99_cast_fp16 = add(x = attn_weights_97_cast_fp16, y = var_2059)[name = string("attn_weights_99_cast_fp16")]; - int32 var_12040 = const()[name = string("op_12040"), val = int32(-1)]; - tensor attn_weights_101_cast_fp16 = softmax(axis = var_12040, x = attn_weights_99_cast_fp16)[name = string("attn_weights_101_cast_fp16")]; - bool attn_output_161_transpose_x_0 = const()[name = string("attn_output_161_transpose_x_0"), val = bool(false)]; - bool attn_output_161_transpose_y_0 = const()[name = string("attn_output_161_transpose_y_0"), val = bool(false)]; - tensor attn_output_161_cast_fp16 = matmul(transpose_x = attn_output_161_transpose_x_0, transpose_y = attn_output_161_transpose_y_0, x = attn_weights_101_cast_fp16, y = x_267_cast_fp16)[name = string("attn_output_161_cast_fp16")]; - tensor var_12051_perm_0 = const()[name = string("op_12051_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_12055 = const()[name = string("op_12055"), val = tensor([1, 1, 1024])]; - tensor var_12051_cast_fp16 = transpose(perm = var_12051_perm_0, x = attn_output_161_cast_fp16)[name = string("transpose_58")]; - tensor attn_output_165_cast_fp16 = reshape(shape = var_12055, x = var_12051_cast_fp16)[name = string("attn_output_165_cast_fp16")]; - tensor var_12060 = const()[name = string("op_12060"), val = tensor([0, 2, 1])]; - string var_12076_pad_type_0 = const()[name = string("op_12076_pad_type_0"), val = string("valid")]; - int32 var_12076_groups_0 = const()[name = string("op_12076_groups_0"), val = int32(1)]; - tensor var_12076_strides_0 = const()[name = string("op_12076_strides_0"), val = tensor([1])]; - tensor var_12076_pad_0 = const()[name = string("op_12076_pad_0"), val = tensor([0, 0])]; - tensor var_12076_dilations_0 = const()[name = string("op_12076_dilations_0"), val = tensor([1])]; - tensor squeeze_16_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357051904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357936704))))[name = string("squeeze_16_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_12061_cast_fp16 = transpose(perm = var_12060, x = attn_output_165_cast_fp16)[name = string("transpose_57")]; - tensor var_12076_cast_fp16 = conv(dilations = var_12076_dilations_0, groups = var_12076_groups_0, pad = var_12076_pad_0, pad_type = var_12076_pad_type_0, strides = var_12076_strides_0, weight = squeeze_16_cast_fp16_to_fp32_to_fp16_palettized, x = var_12061_cast_fp16)[name = string("op_12076_cast_fp16")]; - tensor var_12080 = const()[name = string("op_12080"), val = tensor([0, 2, 1])]; - int32 var_12091 = const()[name = string("op_12091"), val = int32(-1)]; - fp16 const_634_promoted_to_fp16 = const()[name = string("const_634_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_265_cast_fp16 = transpose(perm = var_12080, x = var_12076_cast_fp16)[name = string("transpose_56")]; - tensor var_12093_cast_fp16 = mul(x = hidden_states_265_cast_fp16, y = const_634_promoted_to_fp16)[name = string("op_12093_cast_fp16")]; - bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; - tensor input_331_cast_fp16 = concat(axis = var_12091, interleave = input_331_interleave_0, values = (hidden_states_265_cast_fp16, var_12093_cast_fp16))[name = string("input_331_cast_fp16")]; - tensor normed_397_axes_0 = const()[name = string("normed_397_axes_0"), val = tensor([-1])]; - fp16 var_12088_to_fp16 = const()[name = string("op_12088_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_397_cast_fp16 = layer_norm(axes = normed_397_axes_0, epsilon = var_12088_to_fp16, x = input_331_cast_fp16)[name = string("normed_397_cast_fp16")]; - tensor normed_399_begin_0 = const()[name = string("normed_399_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_399_end_0 = const()[name = string("normed_399_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_399_end_mask_0 = const()[name = string("normed_399_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_399_cast_fp16 = slice_by_index(begin = normed_399_begin_0, end = normed_399_end_0, end_mask = normed_399_end_mask_0, x = normed_397_cast_fp16)[name = string("normed_399_cast_fp16")]; - tensor var_12107_to_fp16 = const()[name = string("op_12107_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357955200)))]; - tensor attn_output_169_cast_fp16 = mul(x = normed_399_cast_fp16, y = var_12107_to_fp16)[name = string("attn_output_169_cast_fp16")]; - tensor hidden_states_267_cast_fp16 = add(x = hidden_states_257_cast_fp16, y = attn_output_169_cast_fp16)[name = string("hidden_states_267_cast_fp16")]; - int32 var_12120 = const()[name = string("op_12120"), val = int32(-1)]; - fp16 const_638_promoted_to_fp16 = const()[name = string("const_638_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12122_cast_fp16 = mul(x = hidden_states_267_cast_fp16, y = const_638_promoted_to_fp16)[name = string("op_12122_cast_fp16")]; - bool input_333_interleave_0 = const()[name = string("input_333_interleave_0"), val = bool(false)]; - tensor input_333_cast_fp16 = concat(axis = var_12120, interleave = input_333_interleave_0, values = (hidden_states_267_cast_fp16, var_12122_cast_fp16))[name = string("input_333_cast_fp16")]; - tensor normed_401_axes_0 = const()[name = string("normed_401_axes_0"), val = tensor([-1])]; - fp16 var_12117_to_fp16 = const()[name = string("op_12117_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_401_cast_fp16 = layer_norm(axes = normed_401_axes_0, epsilon = var_12117_to_fp16, x = input_333_cast_fp16)[name = string("normed_401_cast_fp16")]; - tensor normed_403_begin_0 = const()[name = string("normed_403_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_403_end_0 = const()[name = string("normed_403_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_403_end_mask_0 = const()[name = string("normed_403_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_403_cast_fp16 = slice_by_index(begin = normed_403_begin_0, end = normed_403_end_0, end_mask = normed_403_end_mask_0, x = normed_401_cast_fp16)[name = string("normed_403_cast_fp16")]; - tensor var_12136_to_fp16 = const()[name = string("op_12136_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357957568)))]; - tensor x_269_cast_fp16 = mul(x = normed_403_cast_fp16, y = var_12136_to_fp16)[name = string("x_269_cast_fp16")]; - tensor var_12148 = const()[name = string("op_12148"), val = tensor([0, 2, 1])]; - tensor input_335_axes_0 = const()[name = string("input_335_axes_0"), val = tensor([2])]; - tensor var_12149_cast_fp16 = transpose(perm = var_12148, x = x_269_cast_fp16)[name = string("transpose_55")]; - tensor input_335_cast_fp16 = expand_dims(axes = input_335_axes_0, x = var_12149_cast_fp16)[name = string("input_335_cast_fp16")]; - string x_271_pad_type_0 = const()[name = string("x_271_pad_type_0"), val = string("valid")]; - tensor x_271_strides_0 = const()[name = string("x_271_strides_0"), val = tensor([1, 1])]; - tensor x_271_pad_0 = const()[name = string("x_271_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_271_dilations_0 = const()[name = string("x_271_dilations_0"), val = tensor([1, 1])]; - int32 x_271_groups_0 = const()[name = string("x_271_groups_0"), val = int32(1)]; - tensor model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755614336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(761586368))))[name = string("model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_271_cast_fp16 = conv(dilations = x_271_dilations_0, groups = x_271_groups_0, pad = x_271_pad_0, pad_type = x_271_pad_type_0, strides = x_271_strides_0, weight = model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_335_cast_fp16)[name = string("x_271_cast_fp16")]; - string b_33_pad_type_0 = const()[name = string("b_33_pad_type_0"), val = string("valid")]; - tensor b_33_strides_0 = const()[name = string("b_33_strides_0"), val = tensor([1, 1])]; - tensor b_33_pad_0 = const()[name = string("b_33_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_33_dilations_0 = const()[name = string("b_33_dilations_0"), val = tensor([1, 1])]; - int32 b_33_groups_0 = const()[name = string("b_33_groups_0"), val = int32(1)]; - tensor model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(761697024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767669056))))[name = string("model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_33_cast_fp16 = conv(dilations = b_33_dilations_0, groups = b_33_groups_0, pad = b_33_pad_0, pad_type = b_33_pad_type_0, strides = b_33_strides_0, weight = model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_335_cast_fp16)[name = string("b_33_cast_fp16")]; - string var_12174_mode_0 = const()[name = string("op_12174_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_12174_cast_fp16 = gelu(mode = var_12174_mode_0, x = x_271_cast_fp16)[name = string("op_12174_cast_fp16")]; - tensor input_337_cast_fp16 = mul(x = var_12174_cast_fp16, y = b_33_cast_fp16)[name = string("input_337_cast_fp16")]; - string e_33_pad_type_0 = const()[name = string("e_33_pad_type_0"), val = string("valid")]; - tensor e_33_strides_0 = const()[name = string("e_33_strides_0"), val = tensor([1, 1])]; - tensor e_33_pad_0 = const()[name = string("e_33_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_33_dilations_0 = const()[name = string("e_33_dilations_0"), val = tensor([1, 1])]; - int32 e_33_groups_0 = const()[name = string("e_33_groups_0"), val = int32(1)]; - tensor model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370125312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376097344))))[name = string("model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_33_cast_fp16 = conv(dilations = e_33_dilations_0, groups = e_33_groups_0, pad = e_33_pad_0, pad_type = e_33_pad_type_0, strides = e_33_strides_0, weight = model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_337_cast_fp16)[name = string("e_33_cast_fp16")]; - tensor var_12182_axes_0 = const()[name = string("op_12182_axes_0"), val = tensor([2])]; - tensor var_12182_cast_fp16 = squeeze(axes = var_12182_axes_0, x = e_33_cast_fp16)[name = string("op_12182_cast_fp16")]; - tensor var_12183 = const()[name = string("op_12183"), val = tensor([0, 2, 1])]; - int32 var_12194 = const()[name = string("op_12194"), val = int32(-1)]; - fp16 const_642_promoted_to_fp16 = const()[name = string("const_642_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_269_cast_fp16 = transpose(perm = var_12183, x = var_12182_cast_fp16)[name = string("transpose_54")]; - tensor var_12196_cast_fp16 = mul(x = hidden_states_269_cast_fp16, y = const_642_promoted_to_fp16)[name = string("op_12196_cast_fp16")]; - bool input_339_interleave_0 = const()[name = string("input_339_interleave_0"), val = bool(false)]; - tensor input_339_cast_fp16 = concat(axis = var_12194, interleave = input_339_interleave_0, values = (hidden_states_269_cast_fp16, var_12196_cast_fp16))[name = string("input_339_cast_fp16")]; - tensor normed_405_axes_0 = const()[name = string("normed_405_axes_0"), val = tensor([-1])]; - fp16 var_12191_to_fp16 = const()[name = string("op_12191_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_405_cast_fp16 = layer_norm(axes = normed_405_axes_0, epsilon = var_12191_to_fp16, x = input_339_cast_fp16)[name = string("normed_405_cast_fp16")]; - tensor normed_407_begin_0 = const()[name = string("normed_407_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_407_end_0 = const()[name = string("normed_407_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_407_end_mask_0 = const()[name = string("normed_407_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_407_cast_fp16 = slice_by_index(begin = normed_407_begin_0, end = normed_407_end_0, end_mask = normed_407_end_mask_0, x = normed_405_cast_fp16)[name = string("normed_407_cast_fp16")]; - tensor var_12210_to_fp16 = const()[name = string("op_12210_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376115840)))]; - tensor hidden_states_271_cast_fp16 = mul(x = normed_407_cast_fp16, y = var_12210_to_fp16)[name = string("hidden_states_271_cast_fp16")]; - tensor hidden_states_273_cast_fp16 = add(x = hidden_states_267_cast_fp16, y = hidden_states_271_cast_fp16)[name = string("hidden_states_273_cast_fp16")]; - int32 var_12261 = const()[name = string("op_12261"), val = int32(-1)]; - fp16 const_646_promoted_to_fp16 = const()[name = string("const_646_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12263_cast_fp16 = mul(x = hidden_states_273_cast_fp16, y = const_646_promoted_to_fp16)[name = string("op_12263_cast_fp16")]; - bool input_341_interleave_0 = const()[name = string("input_341_interleave_0"), val = bool(false)]; - tensor input_341_cast_fp16 = concat(axis = var_12261, interleave = input_341_interleave_0, values = (hidden_states_273_cast_fp16, var_12263_cast_fp16))[name = string("input_341_cast_fp16")]; - tensor normed_409_axes_0 = const()[name = string("normed_409_axes_0"), val = tensor([-1])]; - fp16 var_12258_to_fp16 = const()[name = string("op_12258_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_409_cast_fp16 = layer_norm(axes = normed_409_axes_0, epsilon = var_12258_to_fp16, x = input_341_cast_fp16)[name = string("normed_409_cast_fp16")]; - tensor normed_411_begin_0 = const()[name = string("normed_411_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_411_end_0 = const()[name = string("normed_411_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_411_end_mask_0 = const()[name = string("normed_411_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_411_cast_fp16 = slice_by_index(begin = normed_411_begin_0, end = normed_411_end_0, end_mask = normed_411_end_mask_0, x = normed_409_cast_fp16)[name = string("normed_411_cast_fp16")]; - tensor var_12277_to_fp16 = const()[name = string("op_12277_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376118208)))]; - tensor hidden_states_275_cast_fp16 = mul(x = normed_411_cast_fp16, y = var_12277_to_fp16)[name = string("hidden_states_275_cast_fp16")]; - tensor var_12282 = const()[name = string("op_12282"), val = tensor([0, 2, 1])]; - tensor var_12285_axes_0 = const()[name = string("op_12285_axes_0"), val = tensor([2])]; - tensor var_12283_cast_fp16 = transpose(perm = var_12282, x = hidden_states_275_cast_fp16)[name = string("transpose_53")]; - tensor var_12285_cast_fp16 = expand_dims(axes = var_12285_axes_0, x = var_12283_cast_fp16)[name = string("op_12285_cast_fp16")]; - string var_12301_pad_type_0 = const()[name = string("op_12301_pad_type_0"), val = string("valid")]; - tensor var_12301_strides_0 = const()[name = string("op_12301_strides_0"), val = tensor([1, 1])]; - tensor var_12301_pad_0 = const()[name = string("op_12301_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_12301_dilations_0 = const()[name = string("op_12301_dilations_0"), val = tensor([1, 1])]; - int32 var_12301_groups_0 = const()[name = string("op_12301_groups_0"), val = int32(1)]; - tensor var_12301 = conv(dilations = var_12301_dilations_0, groups = var_12301_groups_0, pad = var_12301_pad_0, pad_type = var_12301_pad_type_0, strides = var_12301_strides_0, weight = model_model_layers_17_self_attn_q_proj_weight_palettized, x = var_12285_cast_fp16)[name = string("op_12301")]; - tensor var_12306 = const()[name = string("op_12306"), val = tensor([1, 4, 1, 256])]; - tensor var_12307 = reshape(shape = var_12306, x = var_12301)[name = string("op_12307")]; - string var_12323_pad_type_0 = const()[name = string("op_12323_pad_type_0"), val = string("valid")]; - tensor var_12323_strides_0 = const()[name = string("op_12323_strides_0"), val = tensor([1, 1])]; - tensor var_12323_pad_0 = const()[name = string("op_12323_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_12323_dilations_0 = const()[name = string("op_12323_dilations_0"), val = tensor([1, 1])]; - int32 var_12323_groups_0 = const()[name = string("op_12323_groups_0"), val = int32(1)]; - tensor var_12323 = conv(dilations = var_12323_dilations_0, groups = var_12323_groups_0, pad = var_12323_pad_0, pad_type = var_12323_pad_type_0, strides = var_12323_strides_0, weight = model_model_layers_17_self_attn_k_proj_weight_palettized, x = var_12285_cast_fp16)[name = string("op_12323")]; - tensor var_12328 = const()[name = string("op_12328"), val = tensor([1, 1, 1, 256])]; - tensor var_12329 = reshape(shape = var_12328, x = var_12323)[name = string("op_12329")]; - string var_12345_pad_type_0 = const()[name = string("op_12345_pad_type_0"), val = string("valid")]; - tensor var_12345_strides_0 = const()[name = string("op_12345_strides_0"), val = tensor([1, 1])]; - tensor var_12345_pad_0 = const()[name = string("op_12345_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_12345_dilations_0 = const()[name = string("op_12345_dilations_0"), val = tensor([1, 1])]; - int32 var_12345_groups_0 = const()[name = string("op_12345_groups_0"), val = int32(1)]; - tensor var_12345 = conv(dilations = var_12345_dilations_0, groups = var_12345_groups_0, pad = var_12345_pad_0, pad_type = var_12345_pad_type_0, strides = var_12345_strides_0, weight = model_model_layers_17_self_attn_v_proj_weight_palettized, x = var_12285_cast_fp16)[name = string("op_12345")]; - tensor var_12350 = const()[name = string("op_12350"), val = tensor([1, 1, 1, 256])]; - tensor var_12351 = reshape(shape = var_12350, x = var_12345)[name = string("op_12351")]; - int32 var_12366 = const()[name = string("op_12366"), val = int32(-1)]; - fp16 const_650_promoted = const()[name = string("const_650_promoted"), val = fp16(-0x1p+0)]; - tensor var_12368 = mul(x = var_12307, y = const_650_promoted)[name = string("op_12368")]; - bool input_345_interleave_0 = const()[name = string("input_345_interleave_0"), val = bool(false)]; - tensor input_345 = concat(axis = var_12366, interleave = input_345_interleave_0, values = (var_12307, var_12368))[name = string("input_345")]; - tensor normed_413_axes_0 = const()[name = string("normed_413_axes_0"), val = tensor([-1])]; - fp16 var_12363_to_fp16 = const()[name = string("op_12363_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_413_cast_fp16 = layer_norm(axes = normed_413_axes_0, epsilon = var_12363_to_fp16, x = input_345)[name = string("normed_413_cast_fp16")]; - tensor normed_415_begin_0 = const()[name = string("normed_415_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_415_end_0 = const()[name = string("normed_415_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_415_end_mask_0 = const()[name = string("normed_415_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_415 = slice_by_index(begin = normed_415_begin_0, end = normed_415_end_0, end_mask = normed_415_end_mask_0, x = normed_413_cast_fp16)[name = string("normed_415")]; - tensor var_12382_to_fp16 = const()[name = string("op_12382_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376120576)))]; - tensor q_35_cast_fp16 = mul(x = normed_415, y = var_12382_to_fp16)[name = string("q_35_cast_fp16")]; - int32 var_12393 = const()[name = string("op_12393"), val = int32(-1)]; - fp16 const_654_promoted = const()[name = string("const_654_promoted"), val = fp16(-0x1p+0)]; - tensor var_12395 = mul(x = var_12329, y = const_654_promoted)[name = string("op_12395")]; - bool input_347_interleave_0 = const()[name = string("input_347_interleave_0"), val = bool(false)]; - tensor input_347 = concat(axis = var_12393, interleave = input_347_interleave_0, values = (var_12329, var_12395))[name = string("input_347")]; - tensor normed_417_axes_0 = const()[name = string("normed_417_axes_0"), val = tensor([-1])]; - fp16 var_12390_to_fp16 = const()[name = string("op_12390_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_417_cast_fp16 = layer_norm(axes = normed_417_axes_0, epsilon = var_12390_to_fp16, x = input_347)[name = string("normed_417_cast_fp16")]; - tensor normed_419_begin_0 = const()[name = string("normed_419_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_419_end_0 = const()[name = string("normed_419_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_419_end_mask_0 = const()[name = string("normed_419_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_419 = slice_by_index(begin = normed_419_begin_0, end = normed_419_end_0, end_mask = normed_419_end_mask_0, x = normed_417_cast_fp16)[name = string("normed_419")]; - tensor var_12409_to_fp16 = const()[name = string("op_12409_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376121152)))]; - tensor k_35_cast_fp16 = mul(x = normed_419, y = var_12409_to_fp16)[name = string("k_35_cast_fp16")]; - tensor var_12411_cast_fp16 = mul(x = q_35_cast_fp16, y = cos_21_cast_fp16)[name = string("op_12411_cast_fp16")]; - tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_69_cast_fp16 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_35_cast_fp16)[name = string("x1_69_cast_fp16")]; - tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_69_cast_fp16 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_35_cast_fp16)[name = string("x2_69_cast_fp16")]; - fp16 const_660_promoted_to_fp16 = const()[name = string("const_660_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12432_cast_fp16 = mul(x = x2_69_cast_fp16, y = const_660_promoted_to_fp16)[name = string("op_12432_cast_fp16")]; - int32 var_12434 = const()[name = string("op_12434"), val = int32(-1)]; - bool var_12435_interleave_0 = const()[name = string("op_12435_interleave_0"), val = bool(false)]; - tensor var_12435_cast_fp16 = concat(axis = var_12434, interleave = var_12435_interleave_0, values = (var_12432_cast_fp16, x1_69_cast_fp16))[name = string("op_12435_cast_fp16")]; - tensor var_12436_cast_fp16 = mul(x = var_12435_cast_fp16, y = sin_21_cast_fp16)[name = string("op_12436_cast_fp16")]; - tensor query_states_69_cast_fp16 = add(x = var_12411_cast_fp16, y = var_12436_cast_fp16)[name = string("query_states_69_cast_fp16")]; - tensor var_12439_cast_fp16 = mul(x = k_35_cast_fp16, y = cos_21_cast_fp16)[name = string("op_12439_cast_fp16")]; - tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_71_cast_fp16 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_35_cast_fp16)[name = string("x1_71_cast_fp16")]; - tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_71_cast_fp16 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_35_cast_fp16)[name = string("x2_71_cast_fp16")]; - fp16 const_663_promoted_to_fp16 = const()[name = string("const_663_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12460_cast_fp16 = mul(x = x2_71_cast_fp16, y = const_663_promoted_to_fp16)[name = string("op_12460_cast_fp16")]; - int32 var_12462 = const()[name = string("op_12462"), val = int32(-1)]; - bool var_12463_interleave_0 = const()[name = string("op_12463_interleave_0"), val = bool(false)]; - tensor var_12463_cast_fp16 = concat(axis = var_12462, interleave = var_12463_interleave_0, values = (var_12460_cast_fp16, x1_71_cast_fp16))[name = string("op_12463_cast_fp16")]; - tensor var_12464_cast_fp16 = mul(x = var_12463_cast_fp16, y = sin_21_cast_fp16)[name = string("op_12464_cast_fp16")]; - tensor key_states_69_cast_fp16 = add(x = var_12439_cast_fp16, y = var_12464_cast_fp16)[name = string("key_states_69_cast_fp16")]; - tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([2])]; - tensor expand_dims_175 = const()[name = string("expand_dims_175"), val = tensor([0])]; - tensor expand_dims_177 = const()[name = string("expand_dims_177"), val = tensor([0])]; - tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([3])]; - int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; - bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; - tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_174, expand_dims_175, current_pos, expand_dims_177))[name = string("concat_78")]; - tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; - tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; - int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; - bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; - tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_178, concat_79_values1_0, var_4997, concat_79_values3_0))[name = string("concat_79")]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_5_stride_0, update = key_states_69_cast_fp16, x = coreml_update_state_75)[name = string("model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_86_write_state")]; - tensor coreml_update_state_86 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_86")]; - tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([6])]; - tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; - tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; - tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([7])]; - int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; - bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; - tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (expand_dims_180, expand_dims_181, current_pos, expand_dims_183))[name = string("concat_82")]; - tensor concat_83_values1_0 = const()[name = string("concat_83_values1_0"), val = tensor([0])]; - tensor concat_83_values3_0 = const()[name = string("concat_83_values3_0"), val = tensor([0])]; - int32 concat_83_axis_0 = const()[name = string("concat_83_axis_0"), val = int32(0)]; - bool concat_83_interleave_0 = const()[name = string("concat_83_interleave_0"), val = bool(false)]; - tensor concat_83 = concat(axis = concat_83_axis_0, interleave = concat_83_interleave_0, values = (expand_dims_184, concat_83_values1_0, var_4997, concat_83_values3_0))[name = string("concat_83")]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_82, begin_mask = model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0, end = concat_83, end_mask = model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_6_stride_0, update = var_12351, x = coreml_update_state_86)[name = string("model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_87_write_state")]; - tensor coreml_update_state_87 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_87")]; - tensor var_12519_begin_0 = const()[name = string("op_12519_begin_0"), val = tensor([2, 0, 0, 0])]; - tensor var_12519_end_0 = const()[name = string("op_12519_end_0"), val = tensor([3, 1, 4096, 256])]; - tensor var_12519_end_mask_0 = const()[name = string("op_12519_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_12519_cast_fp16 = slice_by_index(begin = var_12519_begin_0, end = var_12519_end_0, end_mask = var_12519_end_mask_0, x = coreml_update_state_87)[name = string("op_12519_cast_fp16")]; - tensor var_12526_begin_0 = const()[name = string("op_12526_begin_0"), val = tensor([6, 0, 0, 0])]; - tensor var_12526_end_0 = const()[name = string("op_12526_end_0"), val = tensor([7, 1, 4096, 256])]; - tensor var_12526_end_mask_0 = const()[name = string("op_12526_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_12526_cast_fp16 = slice_by_index(begin = var_12526_begin_0, end = var_12526_end_0, end_mask = var_12526_end_mask_0, x = coreml_update_state_87)[name = string("op_12526_cast_fp16")]; - tensor var_12563 = const()[name = string("op_12563"), val = tensor([1, 4, 1, 1])]; - tensor x_277_cast_fp16 = tile(reps = var_12563, x = var_12519_cast_fp16)[name = string("x_277_cast_fp16")]; - tensor var_12583 = const()[name = string("op_12583"), val = tensor([1, 4, 1, 1])]; - tensor x_283_cast_fp16 = tile(reps = var_12583, x = var_12526_cast_fp16)[name = string("x_283_cast_fp16")]; - bool var_12610_transpose_x_1 = const()[name = string("op_12610_transpose_x_1"), val = bool(false)]; - bool var_12610_transpose_y_1 = const()[name = string("op_12610_transpose_y_1"), val = bool(true)]; - tensor var_12610 = matmul(transpose_x = var_12610_transpose_x_1, transpose_y = var_12610_transpose_y_1, x = query_states_69_cast_fp16, y = x_277_cast_fp16)[name = string("op_12610")]; - fp16 var_12611_to_fp16 = const()[name = string("op_12611_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_103_cast_fp16 = mul(x = var_12610, y = var_12611_to_fp16)[name = string("attn_weights_103_cast_fp16")]; - tensor attn_weights_105_cast_fp16 = add(x = attn_weights_103_cast_fp16, y = causal_mask)[name = string("attn_weights_105_cast_fp16")]; - int32 var_12646 = const()[name = string("op_12646"), val = int32(-1)]; - tensor attn_weights_107_cast_fp16 = softmax(axis = var_12646, x = attn_weights_105_cast_fp16)[name = string("attn_weights_107_cast_fp16")]; - bool attn_output_171_transpose_x_0 = const()[name = string("attn_output_171_transpose_x_0"), val = bool(false)]; - bool attn_output_171_transpose_y_0 = const()[name = string("attn_output_171_transpose_y_0"), val = bool(false)]; - tensor attn_output_171_cast_fp16 = matmul(transpose_x = attn_output_171_transpose_x_0, transpose_y = attn_output_171_transpose_y_0, x = attn_weights_107_cast_fp16, y = x_283_cast_fp16)[name = string("attn_output_171_cast_fp16")]; - tensor var_12657_perm_0 = const()[name = string("op_12657_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_12661 = const()[name = string("op_12661"), val = tensor([1, 1, 1024])]; - tensor var_12657_cast_fp16 = transpose(perm = var_12657_perm_0, x = attn_output_171_cast_fp16)[name = string("transpose_52")]; - tensor attn_output_175_cast_fp16 = reshape(shape = var_12661, x = var_12657_cast_fp16)[name = string("attn_output_175_cast_fp16")]; - tensor var_12666 = const()[name = string("op_12666"), val = tensor([0, 2, 1])]; - string var_12682_pad_type_0 = const()[name = string("op_12682_pad_type_0"), val = string("valid")]; - int32 var_12682_groups_0 = const()[name = string("op_12682_groups_0"), val = int32(1)]; - tensor var_12682_strides_0 = const()[name = string("op_12682_strides_0"), val = tensor([1])]; - tensor var_12682_pad_0 = const()[name = string("op_12682_pad_0"), val = tensor([0, 0])]; - tensor var_12682_dilations_0 = const()[name = string("op_12682_dilations_0"), val = tensor([1])]; - tensor squeeze_17_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376121728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377006528))))[name = string("squeeze_17_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_12667_cast_fp16 = transpose(perm = var_12666, x = attn_output_175_cast_fp16)[name = string("transpose_51")]; - tensor var_12682_cast_fp16 = conv(dilations = var_12682_dilations_0, groups = var_12682_groups_0, pad = var_12682_pad_0, pad_type = var_12682_pad_type_0, strides = var_12682_strides_0, weight = squeeze_17_cast_fp16_to_fp32_to_fp16_palettized, x = var_12667_cast_fp16)[name = string("op_12682_cast_fp16")]; - tensor var_12686 = const()[name = string("op_12686"), val = tensor([0, 2, 1])]; - int32 var_12697 = const()[name = string("op_12697"), val = int32(-1)]; - fp16 const_672_promoted_to_fp16 = const()[name = string("const_672_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_281_cast_fp16 = transpose(perm = var_12686, x = var_12682_cast_fp16)[name = string("transpose_50")]; - tensor var_12699_cast_fp16 = mul(x = hidden_states_281_cast_fp16, y = const_672_promoted_to_fp16)[name = string("op_12699_cast_fp16")]; - bool input_351_interleave_0 = const()[name = string("input_351_interleave_0"), val = bool(false)]; - tensor input_351_cast_fp16 = concat(axis = var_12697, interleave = input_351_interleave_0, values = (hidden_states_281_cast_fp16, var_12699_cast_fp16))[name = string("input_351_cast_fp16")]; - tensor normed_421_axes_0 = const()[name = string("normed_421_axes_0"), val = tensor([-1])]; - fp16 var_12694_to_fp16 = const()[name = string("op_12694_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_421_cast_fp16 = layer_norm(axes = normed_421_axes_0, epsilon = var_12694_to_fp16, x = input_351_cast_fp16)[name = string("normed_421_cast_fp16")]; - tensor normed_423_begin_0 = const()[name = string("normed_423_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_423_end_0 = const()[name = string("normed_423_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_423_end_mask_0 = const()[name = string("normed_423_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_423_cast_fp16 = slice_by_index(begin = normed_423_begin_0, end = normed_423_end_0, end_mask = normed_423_end_mask_0, x = normed_421_cast_fp16)[name = string("normed_423_cast_fp16")]; - tensor var_12713_to_fp16 = const()[name = string("op_12713_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377025024)))]; - tensor attn_output_179_cast_fp16 = mul(x = normed_423_cast_fp16, y = var_12713_to_fp16)[name = string("attn_output_179_cast_fp16")]; - tensor hidden_states_283_cast_fp16 = add(x = hidden_states_273_cast_fp16, y = attn_output_179_cast_fp16)[name = string("hidden_states_283_cast_fp16")]; - int32 var_12726 = const()[name = string("op_12726"), val = int32(-1)]; - fp16 const_676_promoted_to_fp16 = const()[name = string("const_676_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12728_cast_fp16 = mul(x = hidden_states_283_cast_fp16, y = const_676_promoted_to_fp16)[name = string("op_12728_cast_fp16")]; - bool input_353_interleave_0 = const()[name = string("input_353_interleave_0"), val = bool(false)]; - tensor input_353_cast_fp16 = concat(axis = var_12726, interleave = input_353_interleave_0, values = (hidden_states_283_cast_fp16, var_12728_cast_fp16))[name = string("input_353_cast_fp16")]; - tensor normed_425_axes_0 = const()[name = string("normed_425_axes_0"), val = tensor([-1])]; - fp16 var_12723_to_fp16 = const()[name = string("op_12723_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_425_cast_fp16 = layer_norm(axes = normed_425_axes_0, epsilon = var_12723_to_fp16, x = input_353_cast_fp16)[name = string("normed_425_cast_fp16")]; - tensor normed_427_begin_0 = const()[name = string("normed_427_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_427_end_0 = const()[name = string("normed_427_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_427_end_mask_0 = const()[name = string("normed_427_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_427_cast_fp16 = slice_by_index(begin = normed_427_begin_0, end = normed_427_end_0, end_mask = normed_427_end_mask_0, x = normed_425_cast_fp16)[name = string("normed_427_cast_fp16")]; - tensor var_12742_to_fp16 = const()[name = string("op_12742_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377027392)))]; - tensor x_285_cast_fp16 = mul(x = normed_427_cast_fp16, y = var_12742_to_fp16)[name = string("x_285_cast_fp16")]; - tensor var_12754 = const()[name = string("op_12754"), val = tensor([0, 2, 1])]; - tensor input_355_axes_0 = const()[name = string("input_355_axes_0"), val = tensor([2])]; - tensor var_12755_cast_fp16 = transpose(perm = var_12754, x = x_285_cast_fp16)[name = string("transpose_49")]; - tensor input_355_cast_fp16 = expand_dims(axes = input_355_axes_0, x = var_12755_cast_fp16)[name = string("input_355_cast_fp16")]; - string x_287_pad_type_0 = const()[name = string("x_287_pad_type_0"), val = string("valid")]; - tensor x_287_strides_0 = const()[name = string("x_287_strides_0"), val = tensor([1, 1])]; - tensor x_287_pad_0 = const()[name = string("x_287_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_287_dilations_0 = const()[name = string("x_287_dilations_0"), val = tensor([1, 1])]; - int32 x_287_groups_0 = const()[name = string("x_287_groups_0"), val = int32(1)]; - tensor model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767779712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(773751744))))[name = string("model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_287_cast_fp16 = conv(dilations = x_287_dilations_0, groups = x_287_groups_0, pad = x_287_pad_0, pad_type = x_287_pad_type_0, strides = x_287_strides_0, weight = model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_355_cast_fp16)[name = string("x_287_cast_fp16")]; - string b_35_pad_type_0 = const()[name = string("b_35_pad_type_0"), val = string("valid")]; - tensor b_35_strides_0 = const()[name = string("b_35_strides_0"), val = tensor([1, 1])]; - tensor b_35_pad_0 = const()[name = string("b_35_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_35_dilations_0 = const()[name = string("b_35_dilations_0"), val = tensor([1, 1])]; - int32 b_35_groups_0 = const()[name = string("b_35_groups_0"), val = int32(1)]; - tensor model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(773862400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(779834432))))[name = string("model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_35_cast_fp16 = conv(dilations = b_35_dilations_0, groups = b_35_groups_0, pad = b_35_pad_0, pad_type = b_35_pad_type_0, strides = b_35_strides_0, weight = model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_355_cast_fp16)[name = string("b_35_cast_fp16")]; - string var_12780_mode_0 = const()[name = string("op_12780_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_12780_cast_fp16 = gelu(mode = var_12780_mode_0, x = x_287_cast_fp16)[name = string("op_12780_cast_fp16")]; - tensor input_357_cast_fp16 = mul(x = var_12780_cast_fp16, y = b_35_cast_fp16)[name = string("input_357_cast_fp16")]; - string e_35_pad_type_0 = const()[name = string("e_35_pad_type_0"), val = string("valid")]; - tensor e_35_strides_0 = const()[name = string("e_35_strides_0"), val = tensor([1, 1])]; - tensor e_35_pad_0 = const()[name = string("e_35_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_35_dilations_0 = const()[name = string("e_35_dilations_0"), val = tensor([1, 1])]; - int32 e_35_groups_0 = const()[name = string("e_35_groups_0"), val = int32(1)]; - tensor model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389195136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395167168))))[name = string("model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_35_cast_fp16 = conv(dilations = e_35_dilations_0, groups = e_35_groups_0, pad = e_35_pad_0, pad_type = e_35_pad_type_0, strides = e_35_strides_0, weight = model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_357_cast_fp16)[name = string("e_35_cast_fp16")]; - tensor var_12788_axes_0 = const()[name = string("op_12788_axes_0"), val = tensor([2])]; - tensor var_12788_cast_fp16 = squeeze(axes = var_12788_axes_0, x = e_35_cast_fp16)[name = string("op_12788_cast_fp16")]; - tensor var_12789 = const()[name = string("op_12789"), val = tensor([0, 2, 1])]; - int32 var_12800 = const()[name = string("op_12800"), val = int32(-1)]; - fp16 const_680_promoted_to_fp16 = const()[name = string("const_680_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_285_cast_fp16 = transpose(perm = var_12789, x = var_12788_cast_fp16)[name = string("transpose_48")]; - tensor var_12802_cast_fp16 = mul(x = hidden_states_285_cast_fp16, y = const_680_promoted_to_fp16)[name = string("op_12802_cast_fp16")]; - bool input_359_interleave_0 = const()[name = string("input_359_interleave_0"), val = bool(false)]; - tensor input_359_cast_fp16 = concat(axis = var_12800, interleave = input_359_interleave_0, values = (hidden_states_285_cast_fp16, var_12802_cast_fp16))[name = string("input_359_cast_fp16")]; - tensor normed_429_axes_0 = const()[name = string("normed_429_axes_0"), val = tensor([-1])]; - fp16 var_12797_to_fp16 = const()[name = string("op_12797_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_429_cast_fp16 = layer_norm(axes = normed_429_axes_0, epsilon = var_12797_to_fp16, x = input_359_cast_fp16)[name = string("normed_429_cast_fp16")]; - tensor normed_431_begin_0 = const()[name = string("normed_431_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_431_end_0 = const()[name = string("normed_431_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_431_end_mask_0 = const()[name = string("normed_431_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_431_cast_fp16 = slice_by_index(begin = normed_431_begin_0, end = normed_431_end_0, end_mask = normed_431_end_mask_0, x = normed_429_cast_fp16)[name = string("normed_431_cast_fp16")]; - tensor var_12816_to_fp16 = const()[name = string("op_12816_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395185664)))]; - tensor hidden_states_287_cast_fp16 = mul(x = normed_431_cast_fp16, y = var_12816_to_fp16)[name = string("hidden_states_287_cast_fp16")]; - tensor hidden_states_289_cast_fp16 = add(x = hidden_states_283_cast_fp16, y = hidden_states_287_cast_fp16)[name = string("hidden_states_289_cast_fp16")]; - int32 var_12867 = const()[name = string("op_12867"), val = int32(-1)]; - fp16 const_684_promoted_to_fp16 = const()[name = string("const_684_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12869_cast_fp16 = mul(x = hidden_states_289_cast_fp16, y = const_684_promoted_to_fp16)[name = string("op_12869_cast_fp16")]; - bool input_361_interleave_0 = const()[name = string("input_361_interleave_0"), val = bool(false)]; - tensor input_361_cast_fp16 = concat(axis = var_12867, interleave = input_361_interleave_0, values = (hidden_states_289_cast_fp16, var_12869_cast_fp16))[name = string("input_361_cast_fp16")]; - tensor normed_433_axes_0 = const()[name = string("normed_433_axes_0"), val = tensor([-1])]; - fp16 var_12864_to_fp16 = const()[name = string("op_12864_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_433_cast_fp16 = layer_norm(axes = normed_433_axes_0, epsilon = var_12864_to_fp16, x = input_361_cast_fp16)[name = string("normed_433_cast_fp16")]; - tensor normed_435_begin_0 = const()[name = string("normed_435_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_435_end_0 = const()[name = string("normed_435_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_435_end_mask_0 = const()[name = string("normed_435_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_435_cast_fp16 = slice_by_index(begin = normed_435_begin_0, end = normed_435_end_0, end_mask = normed_435_end_mask_0, x = normed_433_cast_fp16)[name = string("normed_435_cast_fp16")]; - tensor var_12883_to_fp16 = const()[name = string("op_12883_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395188032)))]; - tensor hidden_states_291_cast_fp16 = mul(x = normed_435_cast_fp16, y = var_12883_to_fp16)[name = string("hidden_states_291_cast_fp16")]; - tensor var_12888 = const()[name = string("op_12888"), val = tensor([0, 2, 1])]; - tensor var_12891_axes_0 = const()[name = string("op_12891_axes_0"), val = tensor([2])]; - tensor var_12889_cast_fp16 = transpose(perm = var_12888, x = hidden_states_291_cast_fp16)[name = string("transpose_47")]; - tensor var_12891_cast_fp16 = expand_dims(axes = var_12891_axes_0, x = var_12889_cast_fp16)[name = string("op_12891_cast_fp16")]; - string var_12907_pad_type_0 = const()[name = string("op_12907_pad_type_0"), val = string("valid")]; - tensor var_12907_strides_0 = const()[name = string("op_12907_strides_0"), val = tensor([1, 1])]; - tensor var_12907_pad_0 = const()[name = string("op_12907_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_12907_dilations_0 = const()[name = string("op_12907_dilations_0"), val = tensor([1, 1])]; - int32 var_12907_groups_0 = const()[name = string("op_12907_groups_0"), val = int32(1)]; - tensor var_12907 = conv(dilations = var_12907_dilations_0, groups = var_12907_groups_0, pad = var_12907_pad_0, pad_type = var_12907_pad_type_0, strides = var_12907_strides_0, weight = model_model_layers_18_self_attn_q_proj_weight_palettized, x = var_12891_cast_fp16)[name = string("op_12907")]; - tensor var_12912 = const()[name = string("op_12912"), val = tensor([1, 4, 1, 256])]; - tensor var_12913 = reshape(shape = var_12912, x = var_12907)[name = string("op_12913")]; - string var_12929_pad_type_0 = const()[name = string("op_12929_pad_type_0"), val = string("valid")]; - tensor var_12929_strides_0 = const()[name = string("op_12929_strides_0"), val = tensor([1, 1])]; - tensor var_12929_pad_0 = const()[name = string("op_12929_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_12929_dilations_0 = const()[name = string("op_12929_dilations_0"), val = tensor([1, 1])]; - int32 var_12929_groups_0 = const()[name = string("op_12929_groups_0"), val = int32(1)]; - tensor var_12929 = conv(dilations = var_12929_dilations_0, groups = var_12929_groups_0, pad = var_12929_pad_0, pad_type = var_12929_pad_type_0, strides = var_12929_strides_0, weight = model_model_layers_18_self_attn_k_proj_weight_palettized, x = var_12891_cast_fp16)[name = string("op_12929")]; - tensor var_12934 = const()[name = string("op_12934"), val = tensor([1, 1, 1, 256])]; - tensor var_12935 = reshape(shape = var_12934, x = var_12929)[name = string("op_12935")]; - string var_12951_pad_type_0 = const()[name = string("op_12951_pad_type_0"), val = string("valid")]; - tensor var_12951_strides_0 = const()[name = string("op_12951_strides_0"), val = tensor([1, 1])]; - tensor var_12951_pad_0 = const()[name = string("op_12951_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_12951_dilations_0 = const()[name = string("op_12951_dilations_0"), val = tensor([1, 1])]; - int32 var_12951_groups_0 = const()[name = string("op_12951_groups_0"), val = int32(1)]; - tensor var_12951 = conv(dilations = var_12951_dilations_0, groups = var_12951_groups_0, pad = var_12951_pad_0, pad_type = var_12951_pad_type_0, strides = var_12951_strides_0, weight = model_model_layers_18_self_attn_v_proj_weight_palettized, x = var_12891_cast_fp16)[name = string("op_12951")]; - tensor var_12956 = const()[name = string("op_12956"), val = tensor([1, 1, 1, 256])]; - tensor var_12957 = reshape(shape = var_12956, x = var_12951)[name = string("op_12957")]; - int32 var_12972 = const()[name = string("op_12972"), val = int32(-1)]; - fp16 const_688_promoted = const()[name = string("const_688_promoted"), val = fp16(-0x1p+0)]; - tensor var_12974 = mul(x = var_12913, y = const_688_promoted)[name = string("op_12974")]; - bool input_365_interleave_0 = const()[name = string("input_365_interleave_0"), val = bool(false)]; - tensor input_365 = concat(axis = var_12972, interleave = input_365_interleave_0, values = (var_12913, var_12974))[name = string("input_365")]; - tensor normed_437_axes_0 = const()[name = string("normed_437_axes_0"), val = tensor([-1])]; - fp16 var_12969_to_fp16 = const()[name = string("op_12969_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_437_cast_fp16 = layer_norm(axes = normed_437_axes_0, epsilon = var_12969_to_fp16, x = input_365)[name = string("normed_437_cast_fp16")]; - tensor normed_439_begin_0 = const()[name = string("normed_439_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_439_end_0 = const()[name = string("normed_439_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_439_end_mask_0 = const()[name = string("normed_439_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_439 = slice_by_index(begin = normed_439_begin_0, end = normed_439_end_0, end_mask = normed_439_end_mask_0, x = normed_437_cast_fp16)[name = string("normed_439")]; - tensor var_12988_to_fp16 = const()[name = string("op_12988_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395190400)))]; - tensor q_37_cast_fp16 = mul(x = normed_439, y = var_12988_to_fp16)[name = string("q_37_cast_fp16")]; - int32 var_12999 = const()[name = string("op_12999"), val = int32(-1)]; - fp16 const_692_promoted = const()[name = string("const_692_promoted"), val = fp16(-0x1p+0)]; - tensor var_13001 = mul(x = var_12935, y = const_692_promoted)[name = string("op_13001")]; - bool input_367_interleave_0 = const()[name = string("input_367_interleave_0"), val = bool(false)]; - tensor input_367 = concat(axis = var_12999, interleave = input_367_interleave_0, values = (var_12935, var_13001))[name = string("input_367")]; - tensor normed_441_axes_0 = const()[name = string("normed_441_axes_0"), val = tensor([-1])]; - fp16 var_12996_to_fp16 = const()[name = string("op_12996_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_441_cast_fp16 = layer_norm(axes = normed_441_axes_0, epsilon = var_12996_to_fp16, x = input_367)[name = string("normed_441_cast_fp16")]; - tensor normed_443_begin_0 = const()[name = string("normed_443_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_443_end_0 = const()[name = string("normed_443_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_443_end_mask_0 = const()[name = string("normed_443_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_443 = slice_by_index(begin = normed_443_begin_0, end = normed_443_end_0, end_mask = normed_443_end_mask_0, x = normed_441_cast_fp16)[name = string("normed_443")]; - tensor var_13015_to_fp16 = const()[name = string("op_13015_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395190976)))]; - tensor k_37_cast_fp16 = mul(x = normed_443, y = var_13015_to_fp16)[name = string("k_37_cast_fp16")]; - tensor var_13017_cast_fp16 = mul(x = q_37_cast_fp16, y = cos_1_cast_fp16)[name = string("op_13017_cast_fp16")]; - tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_73_cast_fp16 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_37_cast_fp16)[name = string("x1_73_cast_fp16")]; - tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_73_cast_fp16 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_37_cast_fp16)[name = string("x2_73_cast_fp16")]; - fp16 const_698_promoted_to_fp16 = const()[name = string("const_698_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13038_cast_fp16 = mul(x = x2_73_cast_fp16, y = const_698_promoted_to_fp16)[name = string("op_13038_cast_fp16")]; - int32 var_13040 = const()[name = string("op_13040"), val = int32(-1)]; - bool var_13041_interleave_0 = const()[name = string("op_13041_interleave_0"), val = bool(false)]; - tensor var_13041_cast_fp16 = concat(axis = var_13040, interleave = var_13041_interleave_0, values = (var_13038_cast_fp16, x1_73_cast_fp16))[name = string("op_13041_cast_fp16")]; - tensor var_13042_cast_fp16 = mul(x = var_13041_cast_fp16, y = sin_1_cast_fp16)[name = string("op_13042_cast_fp16")]; - tensor query_states_73_cast_fp16 = add(x = var_13017_cast_fp16, y = var_13042_cast_fp16)[name = string("query_states_73_cast_fp16")]; - tensor var_13045_cast_fp16 = mul(x = k_37_cast_fp16, y = cos_1_cast_fp16)[name = string("op_13045_cast_fp16")]; - tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_75_cast_fp16 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_37_cast_fp16)[name = string("x1_75_cast_fp16")]; - tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_75_cast_fp16 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_37_cast_fp16)[name = string("x2_75_cast_fp16")]; - fp16 const_701_promoted_to_fp16 = const()[name = string("const_701_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13066_cast_fp16 = mul(x = x2_75_cast_fp16, y = const_701_promoted_to_fp16)[name = string("op_13066_cast_fp16")]; - int32 var_13068 = const()[name = string("op_13068"), val = int32(-1)]; - bool var_13069_interleave_0 = const()[name = string("op_13069_interleave_0"), val = bool(false)]; - tensor var_13069_cast_fp16 = concat(axis = var_13068, interleave = var_13069_interleave_0, values = (var_13066_cast_fp16, x1_75_cast_fp16))[name = string("op_13069_cast_fp16")]; - tensor var_13070_cast_fp16 = mul(x = var_13069_cast_fp16, y = sin_1_cast_fp16)[name = string("op_13070_cast_fp16")]; - tensor key_states_73_cast_fp16 = add(x = var_13045_cast_fp16, y = var_13070_cast_fp16)[name = string("key_states_73_cast_fp16")]; - tensor key_slice_31_begin_0 = const()[name = string("key_slice_31_begin_0"), val = tensor([15, 0, 0, 0])]; - tensor key_slice_31_end_0 = const()[name = string("key_slice_31_end_0"), val = tensor([16, 1, 512, 256])]; - tensor key_slice_31_end_mask_0 = const()[name = string("key_slice_31_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_31_cast_fp16 = slice_by_index(begin = key_slice_31_begin_0, end = key_slice_31_end_0, end_mask = key_slice_31_end_mask_0, x = coreml_update_state_85)[name = string("key_slice_31_cast_fp16")]; - tensor key_tail_31_begin_0 = const()[name = string("key_tail_31_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_31_end_0 = const()[name = string("key_tail_31_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_31_cast_fp16 = slice_by_index(begin = key_tail_31_begin_0, end = key_tail_31_end_0, x = key_slice_31_cast_fp16)[name = string("key_tail_31_cast_fp16")]; - int32 var_13083 = const()[name = string("op_13083"), val = int32(2)]; - bool shifted_key_31_interleave_0 = const()[name = string("shifted_key_31_interleave_0"), val = bool(false)]; - tensor shifted_key_31_cast_fp16 = concat(axis = var_13083, interleave = shifted_key_31_interleave_0, values = (key_tail_31_cast_fp16, key_states_73_cast_fp16))[name = string("shifted_key_31_cast_fp16")]; - tensor concat_84 = const()[name = string("concat_84"), val = tensor([15, 0, 0, 0])]; - tensor concat_85 = const()[name = string("concat_85"), val = tensor([16, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_84, begin_mask = model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0, end = concat_85, end_mask = model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_31_stride_0, update = shifted_key_31_cast_fp16, x = coreml_update_state_85)[name = string("model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_88_write_state")]; - tensor coreml_update_state_88 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_88")]; - tensor value_slice_31_begin_0 = const()[name = string("value_slice_31_begin_0"), val = tensor([37, 0, 0, 0])]; - tensor value_slice_31_end_0 = const()[name = string("value_slice_31_end_0"), val = tensor([38, 1, 512, 256])]; - tensor value_slice_31_end_mask_0 = const()[name = string("value_slice_31_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_31_cast_fp16 = slice_by_index(begin = value_slice_31_begin_0, end = value_slice_31_end_0, end_mask = value_slice_31_end_mask_0, x = coreml_update_state_88)[name = string("value_slice_31_cast_fp16")]; - tensor value_tail_31_begin_0 = const()[name = string("value_tail_31_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_31_end_0 = const()[name = string("value_tail_31_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_31_cast_fp16 = slice_by_index(begin = value_tail_31_begin_0, end = value_tail_31_end_0, x = value_slice_31_cast_fp16)[name = string("value_tail_31_cast_fp16")]; - int32 var_13117 = const()[name = string("op_13117"), val = int32(2)]; - bool shifted_value_31_interleave_0 = const()[name = string("shifted_value_31_interleave_0"), val = bool(false)]; - tensor shifted_value_31_cast_fp16 = concat(axis = var_13117, interleave = shifted_value_31_interleave_0, values = (value_tail_31_cast_fp16, var_12957))[name = string("shifted_value_31_cast_fp16")]; - tensor concat_86 = const()[name = string("concat_86"), val = tensor([37, 0, 0, 0])]; - tensor concat_87 = const()[name = string("concat_87"), val = tensor([38, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_86, begin_mask = model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0, end = concat_87, end_mask = model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_32_stride_0, update = shifted_value_31_cast_fp16, x = coreml_update_state_88)[name = string("model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_89_write_state")]; - tensor coreml_update_state_89 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_89")]; - tensor var_13145_begin_0 = const()[name = string("op_13145_begin_0"), val = tensor([15, 0, 0, 0])]; - tensor var_13145_end_0 = const()[name = string("op_13145_end_0"), val = tensor([16, 1, 512, 256])]; - tensor var_13145_end_mask_0 = const()[name = string("op_13145_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_13145_cast_fp16 = slice_by_index(begin = var_13145_begin_0, end = var_13145_end_0, end_mask = var_13145_end_mask_0, x = coreml_update_state_89)[name = string("op_13145_cast_fp16")]; - tensor var_13152_begin_0 = const()[name = string("op_13152_begin_0"), val = tensor([37, 0, 0, 0])]; - tensor var_13152_end_0 = const()[name = string("op_13152_end_0"), val = tensor([38, 1, 512, 256])]; - tensor var_13152_end_mask_0 = const()[name = string("op_13152_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_13152_cast_fp16 = slice_by_index(begin = var_13152_begin_0, end = var_13152_end_0, end_mask = var_13152_end_mask_0, x = coreml_update_state_89)[name = string("op_13152_cast_fp16")]; - tensor var_13189 = const()[name = string("op_13189"), val = tensor([1, 4, 1, 1])]; - tensor x_293_cast_fp16 = tile(reps = var_13189, x = var_13145_cast_fp16)[name = string("x_293_cast_fp16")]; - tensor var_13209 = const()[name = string("op_13209"), val = tensor([1, 4, 1, 1])]; - tensor x_299_cast_fp16 = tile(reps = var_13209, x = var_13152_cast_fp16)[name = string("x_299_cast_fp16")]; - bool var_13236_transpose_x_1 = const()[name = string("op_13236_transpose_x_1"), val = bool(false)]; - bool var_13236_transpose_y_1 = const()[name = string("op_13236_transpose_y_1"), val = bool(true)]; - tensor var_13236 = matmul(transpose_x = var_13236_transpose_x_1, transpose_y = var_13236_transpose_y_1, x = query_states_73_cast_fp16, y = x_293_cast_fp16)[name = string("op_13236")]; - fp16 var_13237_to_fp16 = const()[name = string("op_13237_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_109_cast_fp16 = mul(x = var_13236, y = var_13237_to_fp16)[name = string("attn_weights_109_cast_fp16")]; - tensor attn_weights_111_cast_fp16 = add(x = attn_weights_109_cast_fp16, y = var_2059)[name = string("attn_weights_111_cast_fp16")]; - int32 var_13272 = const()[name = string("op_13272"), val = int32(-1)]; - tensor attn_weights_113_cast_fp16 = softmax(axis = var_13272, x = attn_weights_111_cast_fp16)[name = string("attn_weights_113_cast_fp16")]; - bool attn_output_181_transpose_x_0 = const()[name = string("attn_output_181_transpose_x_0"), val = bool(false)]; - bool attn_output_181_transpose_y_0 = const()[name = string("attn_output_181_transpose_y_0"), val = bool(false)]; - tensor attn_output_181_cast_fp16 = matmul(transpose_x = attn_output_181_transpose_x_0, transpose_y = attn_output_181_transpose_y_0, x = attn_weights_113_cast_fp16, y = x_299_cast_fp16)[name = string("attn_output_181_cast_fp16")]; - tensor var_13283_perm_0 = const()[name = string("op_13283_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_13287 = const()[name = string("op_13287"), val = tensor([1, 1, 1024])]; - tensor var_13283_cast_fp16 = transpose(perm = var_13283_perm_0, x = attn_output_181_cast_fp16)[name = string("transpose_46")]; - tensor attn_output_185_cast_fp16 = reshape(shape = var_13287, x = var_13283_cast_fp16)[name = string("attn_output_185_cast_fp16")]; - tensor var_13292 = const()[name = string("op_13292"), val = tensor([0, 2, 1])]; - string var_13308_pad_type_0 = const()[name = string("op_13308_pad_type_0"), val = string("valid")]; - int32 var_13308_groups_0 = const()[name = string("op_13308_groups_0"), val = int32(1)]; - tensor var_13308_strides_0 = const()[name = string("op_13308_strides_0"), val = tensor([1])]; - tensor var_13308_pad_0 = const()[name = string("op_13308_pad_0"), val = tensor([0, 0])]; - tensor var_13308_dilations_0 = const()[name = string("op_13308_dilations_0"), val = tensor([1])]; - tensor squeeze_18_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395191552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396076352))))[name = string("squeeze_18_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_13293_cast_fp16 = transpose(perm = var_13292, x = attn_output_185_cast_fp16)[name = string("transpose_45")]; - tensor var_13308_cast_fp16 = conv(dilations = var_13308_dilations_0, groups = var_13308_groups_0, pad = var_13308_pad_0, pad_type = var_13308_pad_type_0, strides = var_13308_strides_0, weight = squeeze_18_cast_fp16_to_fp32_to_fp16_palettized, x = var_13293_cast_fp16)[name = string("op_13308_cast_fp16")]; - tensor var_13312 = const()[name = string("op_13312"), val = tensor([0, 2, 1])]; - int32 var_13323 = const()[name = string("op_13323"), val = int32(-1)]; - fp16 const_710_promoted_to_fp16 = const()[name = string("const_710_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_297_cast_fp16 = transpose(perm = var_13312, x = var_13308_cast_fp16)[name = string("transpose_44")]; - tensor var_13325_cast_fp16 = mul(x = hidden_states_297_cast_fp16, y = const_710_promoted_to_fp16)[name = string("op_13325_cast_fp16")]; - bool input_371_interleave_0 = const()[name = string("input_371_interleave_0"), val = bool(false)]; - tensor input_371_cast_fp16 = concat(axis = var_13323, interleave = input_371_interleave_0, values = (hidden_states_297_cast_fp16, var_13325_cast_fp16))[name = string("input_371_cast_fp16")]; - tensor normed_445_axes_0 = const()[name = string("normed_445_axes_0"), val = tensor([-1])]; - fp16 var_13320_to_fp16 = const()[name = string("op_13320_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_445_cast_fp16 = layer_norm(axes = normed_445_axes_0, epsilon = var_13320_to_fp16, x = input_371_cast_fp16)[name = string("normed_445_cast_fp16")]; - tensor normed_447_begin_0 = const()[name = string("normed_447_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_447_end_0 = const()[name = string("normed_447_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_447_end_mask_0 = const()[name = string("normed_447_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_447_cast_fp16 = slice_by_index(begin = normed_447_begin_0, end = normed_447_end_0, end_mask = normed_447_end_mask_0, x = normed_445_cast_fp16)[name = string("normed_447_cast_fp16")]; - tensor var_13339_to_fp16 = const()[name = string("op_13339_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396094848)))]; - tensor attn_output_189_cast_fp16 = mul(x = normed_447_cast_fp16, y = var_13339_to_fp16)[name = string("attn_output_189_cast_fp16")]; - tensor hidden_states_299_cast_fp16 = add(x = hidden_states_289_cast_fp16, y = attn_output_189_cast_fp16)[name = string("hidden_states_299_cast_fp16")]; - int32 var_13352 = const()[name = string("op_13352"), val = int32(-1)]; - fp16 const_714_promoted_to_fp16 = const()[name = string("const_714_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13354_cast_fp16 = mul(x = hidden_states_299_cast_fp16, y = const_714_promoted_to_fp16)[name = string("op_13354_cast_fp16")]; - bool input_373_interleave_0 = const()[name = string("input_373_interleave_0"), val = bool(false)]; - tensor input_373_cast_fp16 = concat(axis = var_13352, interleave = input_373_interleave_0, values = (hidden_states_299_cast_fp16, var_13354_cast_fp16))[name = string("input_373_cast_fp16")]; - tensor normed_449_axes_0 = const()[name = string("normed_449_axes_0"), val = tensor([-1])]; - fp16 var_13349_to_fp16 = const()[name = string("op_13349_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_449_cast_fp16 = layer_norm(axes = normed_449_axes_0, epsilon = var_13349_to_fp16, x = input_373_cast_fp16)[name = string("normed_449_cast_fp16")]; - tensor normed_451_begin_0 = const()[name = string("normed_451_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_451_end_0 = const()[name = string("normed_451_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_451_end_mask_0 = const()[name = string("normed_451_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_451_cast_fp16 = slice_by_index(begin = normed_451_begin_0, end = normed_451_end_0, end_mask = normed_451_end_mask_0, x = normed_449_cast_fp16)[name = string("normed_451_cast_fp16")]; - tensor var_13368_to_fp16 = const()[name = string("op_13368_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396097216)))]; - tensor x_301_cast_fp16 = mul(x = normed_451_cast_fp16, y = var_13368_to_fp16)[name = string("x_301_cast_fp16")]; - tensor var_13380 = const()[name = string("op_13380"), val = tensor([0, 2, 1])]; - tensor input_375_axes_0 = const()[name = string("input_375_axes_0"), val = tensor([2])]; - tensor var_13381_cast_fp16 = transpose(perm = var_13380, x = x_301_cast_fp16)[name = string("transpose_43")]; - tensor input_375_cast_fp16 = expand_dims(axes = input_375_axes_0, x = var_13381_cast_fp16)[name = string("input_375_cast_fp16")]; - string x_303_pad_type_0 = const()[name = string("x_303_pad_type_0"), val = string("valid")]; - tensor x_303_strides_0 = const()[name = string("x_303_strides_0"), val = tensor([1, 1])]; - tensor x_303_pad_0 = const()[name = string("x_303_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_303_dilations_0 = const()[name = string("x_303_dilations_0"), val = tensor([1, 1])]; - int32 x_303_groups_0 = const()[name = string("x_303_groups_0"), val = int32(1)]; - tensor model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(779945088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(785917120))))[name = string("model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_303_cast_fp16 = conv(dilations = x_303_dilations_0, groups = x_303_groups_0, pad = x_303_pad_0, pad_type = x_303_pad_type_0, strides = x_303_strides_0, weight = model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_375_cast_fp16)[name = string("x_303_cast_fp16")]; - string b_37_pad_type_0 = const()[name = string("b_37_pad_type_0"), val = string("valid")]; - tensor b_37_strides_0 = const()[name = string("b_37_strides_0"), val = tensor([1, 1])]; - tensor b_37_pad_0 = const()[name = string("b_37_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_37_dilations_0 = const()[name = string("b_37_dilations_0"), val = tensor([1, 1])]; - int32 b_37_groups_0 = const()[name = string("b_37_groups_0"), val = int32(1)]; - tensor model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(786027776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(791999808))))[name = string("model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_37_cast_fp16 = conv(dilations = b_37_dilations_0, groups = b_37_groups_0, pad = b_37_pad_0, pad_type = b_37_pad_type_0, strides = b_37_strides_0, weight = model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_375_cast_fp16)[name = string("b_37_cast_fp16")]; - string var_13406_mode_0 = const()[name = string("op_13406_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_13406_cast_fp16 = gelu(mode = var_13406_mode_0, x = x_303_cast_fp16)[name = string("op_13406_cast_fp16")]; - tensor input_377_cast_fp16 = mul(x = var_13406_cast_fp16, y = b_37_cast_fp16)[name = string("input_377_cast_fp16")]; - string e_37_pad_type_0 = const()[name = string("e_37_pad_type_0"), val = string("valid")]; - tensor e_37_strides_0 = const()[name = string("e_37_strides_0"), val = tensor([1, 1])]; - tensor e_37_pad_0 = const()[name = string("e_37_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_37_dilations_0 = const()[name = string("e_37_dilations_0"), val = tensor([1, 1])]; - int32 e_37_groups_0 = const()[name = string("e_37_groups_0"), val = int32(1)]; - tensor model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408264960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414236992))))[name = string("model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_37_cast_fp16 = conv(dilations = e_37_dilations_0, groups = e_37_groups_0, pad = e_37_pad_0, pad_type = e_37_pad_type_0, strides = e_37_strides_0, weight = model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_377_cast_fp16)[name = string("e_37_cast_fp16")]; - tensor var_13414_axes_0 = const()[name = string("op_13414_axes_0"), val = tensor([2])]; - tensor var_13414_cast_fp16 = squeeze(axes = var_13414_axes_0, x = e_37_cast_fp16)[name = string("op_13414_cast_fp16")]; - tensor var_13415 = const()[name = string("op_13415"), val = tensor([0, 2, 1])]; - int32 var_13426 = const()[name = string("op_13426"), val = int32(-1)]; - fp16 const_718_promoted_to_fp16 = const()[name = string("const_718_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_301_cast_fp16 = transpose(perm = var_13415, x = var_13414_cast_fp16)[name = string("transpose_42")]; - tensor var_13428_cast_fp16 = mul(x = hidden_states_301_cast_fp16, y = const_718_promoted_to_fp16)[name = string("op_13428_cast_fp16")]; - bool input_379_interleave_0 = const()[name = string("input_379_interleave_0"), val = bool(false)]; - tensor input_379_cast_fp16 = concat(axis = var_13426, interleave = input_379_interleave_0, values = (hidden_states_301_cast_fp16, var_13428_cast_fp16))[name = string("input_379_cast_fp16")]; - tensor normed_453_axes_0 = const()[name = string("normed_453_axes_0"), val = tensor([-1])]; - fp16 var_13423_to_fp16 = const()[name = string("op_13423_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_453_cast_fp16 = layer_norm(axes = normed_453_axes_0, epsilon = var_13423_to_fp16, x = input_379_cast_fp16)[name = string("normed_453_cast_fp16")]; - tensor normed_455_begin_0 = const()[name = string("normed_455_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_455_end_0 = const()[name = string("normed_455_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_455_end_mask_0 = const()[name = string("normed_455_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_455_cast_fp16 = slice_by_index(begin = normed_455_begin_0, end = normed_455_end_0, end_mask = normed_455_end_mask_0, x = normed_453_cast_fp16)[name = string("normed_455_cast_fp16")]; - tensor var_13442_to_fp16 = const()[name = string("op_13442_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414255488)))]; - tensor hidden_states_303_cast_fp16 = mul(x = normed_455_cast_fp16, y = var_13442_to_fp16)[name = string("hidden_states_303_cast_fp16")]; - tensor hidden_states_305_cast_fp16 = add(x = hidden_states_299_cast_fp16, y = hidden_states_303_cast_fp16)[name = string("hidden_states_305_cast_fp16")]; - int32 var_13493 = const()[name = string("op_13493"), val = int32(-1)]; - fp16 const_722_promoted_to_fp16 = const()[name = string("const_722_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13495_cast_fp16 = mul(x = hidden_states_305_cast_fp16, y = const_722_promoted_to_fp16)[name = string("op_13495_cast_fp16")]; - bool input_381_interleave_0 = const()[name = string("input_381_interleave_0"), val = bool(false)]; - tensor input_381_cast_fp16 = concat(axis = var_13493, interleave = input_381_interleave_0, values = (hidden_states_305_cast_fp16, var_13495_cast_fp16))[name = string("input_381_cast_fp16")]; - tensor normed_457_axes_0 = const()[name = string("normed_457_axes_0"), val = tensor([-1])]; - fp16 var_13490_to_fp16 = const()[name = string("op_13490_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_457_cast_fp16 = layer_norm(axes = normed_457_axes_0, epsilon = var_13490_to_fp16, x = input_381_cast_fp16)[name = string("normed_457_cast_fp16")]; - tensor normed_459_begin_0 = const()[name = string("normed_459_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_459_end_0 = const()[name = string("normed_459_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_459_end_mask_0 = const()[name = string("normed_459_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_459_cast_fp16 = slice_by_index(begin = normed_459_begin_0, end = normed_459_end_0, end_mask = normed_459_end_mask_0, x = normed_457_cast_fp16)[name = string("normed_459_cast_fp16")]; - tensor var_13509_to_fp16 = const()[name = string("op_13509_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414257856)))]; - tensor hidden_states_307_cast_fp16 = mul(x = normed_459_cast_fp16, y = var_13509_to_fp16)[name = string("hidden_states_307_cast_fp16")]; - tensor var_13514 = const()[name = string("op_13514"), val = tensor([0, 2, 1])]; - tensor var_13517_axes_0 = const()[name = string("op_13517_axes_0"), val = tensor([2])]; - tensor var_13515_cast_fp16 = transpose(perm = var_13514, x = hidden_states_307_cast_fp16)[name = string("transpose_41")]; - tensor var_13517_cast_fp16 = expand_dims(axes = var_13517_axes_0, x = var_13515_cast_fp16)[name = string("op_13517_cast_fp16")]; - string var_13533_pad_type_0 = const()[name = string("op_13533_pad_type_0"), val = string("valid")]; - tensor var_13533_strides_0 = const()[name = string("op_13533_strides_0"), val = tensor([1, 1])]; - tensor var_13533_pad_0 = const()[name = string("op_13533_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_13533_dilations_0 = const()[name = string("op_13533_dilations_0"), val = tensor([1, 1])]; - int32 var_13533_groups_0 = const()[name = string("op_13533_groups_0"), val = int32(1)]; - tensor var_13533 = conv(dilations = var_13533_dilations_0, groups = var_13533_groups_0, pad = var_13533_pad_0, pad_type = var_13533_pad_type_0, strides = var_13533_strides_0, weight = model_model_layers_19_self_attn_q_proj_weight_palettized, x = var_13517_cast_fp16)[name = string("op_13533")]; - tensor var_13538 = const()[name = string("op_13538"), val = tensor([1, 4, 1, 256])]; - tensor var_13539 = reshape(shape = var_13538, x = var_13533)[name = string("op_13539")]; - string var_13555_pad_type_0 = const()[name = string("op_13555_pad_type_0"), val = string("valid")]; - tensor var_13555_strides_0 = const()[name = string("op_13555_strides_0"), val = tensor([1, 1])]; - tensor var_13555_pad_0 = const()[name = string("op_13555_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_13555_dilations_0 = const()[name = string("op_13555_dilations_0"), val = tensor([1, 1])]; - int32 var_13555_groups_0 = const()[name = string("op_13555_groups_0"), val = int32(1)]; - tensor var_13555 = conv(dilations = var_13555_dilations_0, groups = var_13555_groups_0, pad = var_13555_pad_0, pad_type = var_13555_pad_type_0, strides = var_13555_strides_0, weight = model_model_layers_19_self_attn_k_proj_weight_palettized, x = var_13517_cast_fp16)[name = string("op_13555")]; - tensor var_13560 = const()[name = string("op_13560"), val = tensor([1, 1, 1, 256])]; - tensor var_13561 = reshape(shape = var_13560, x = var_13555)[name = string("op_13561")]; - string var_13577_pad_type_0 = const()[name = string("op_13577_pad_type_0"), val = string("valid")]; - tensor var_13577_strides_0 = const()[name = string("op_13577_strides_0"), val = tensor([1, 1])]; - tensor var_13577_pad_0 = const()[name = string("op_13577_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_13577_dilations_0 = const()[name = string("op_13577_dilations_0"), val = tensor([1, 1])]; - int32 var_13577_groups_0 = const()[name = string("op_13577_groups_0"), val = int32(1)]; - tensor var_13577 = conv(dilations = var_13577_dilations_0, groups = var_13577_groups_0, pad = var_13577_pad_0, pad_type = var_13577_pad_type_0, strides = var_13577_strides_0, weight = model_model_layers_19_self_attn_v_proj_weight_palettized, x = var_13517_cast_fp16)[name = string("op_13577")]; - tensor var_13582 = const()[name = string("op_13582"), val = tensor([1, 1, 1, 256])]; - tensor var_13583 = reshape(shape = var_13582, x = var_13577)[name = string("op_13583")]; - int32 var_13598 = const()[name = string("op_13598"), val = int32(-1)]; - fp16 const_726_promoted = const()[name = string("const_726_promoted"), val = fp16(-0x1p+0)]; - tensor var_13600 = mul(x = var_13539, y = const_726_promoted)[name = string("op_13600")]; - bool input_385_interleave_0 = const()[name = string("input_385_interleave_0"), val = bool(false)]; - tensor input_385 = concat(axis = var_13598, interleave = input_385_interleave_0, values = (var_13539, var_13600))[name = string("input_385")]; - tensor normed_461_axes_0 = const()[name = string("normed_461_axes_0"), val = tensor([-1])]; - fp16 var_13595_to_fp16 = const()[name = string("op_13595_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_461_cast_fp16 = layer_norm(axes = normed_461_axes_0, epsilon = var_13595_to_fp16, x = input_385)[name = string("normed_461_cast_fp16")]; - tensor normed_463_begin_0 = const()[name = string("normed_463_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_463_end_0 = const()[name = string("normed_463_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_463_end_mask_0 = const()[name = string("normed_463_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_463 = slice_by_index(begin = normed_463_begin_0, end = normed_463_end_0, end_mask = normed_463_end_mask_0, x = normed_461_cast_fp16)[name = string("normed_463")]; - tensor var_13614_to_fp16 = const()[name = string("op_13614_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414260224)))]; - tensor q_39_cast_fp16 = mul(x = normed_463, y = var_13614_to_fp16)[name = string("q_39_cast_fp16")]; - int32 var_13625 = const()[name = string("op_13625"), val = int32(-1)]; - fp16 const_730_promoted = const()[name = string("const_730_promoted"), val = fp16(-0x1p+0)]; - tensor var_13627 = mul(x = var_13561, y = const_730_promoted)[name = string("op_13627")]; - bool input_387_interleave_0 = const()[name = string("input_387_interleave_0"), val = bool(false)]; - tensor input_387 = concat(axis = var_13625, interleave = input_387_interleave_0, values = (var_13561, var_13627))[name = string("input_387")]; - tensor normed_465_axes_0 = const()[name = string("normed_465_axes_0"), val = tensor([-1])]; - fp16 var_13622_to_fp16 = const()[name = string("op_13622_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_465_cast_fp16 = layer_norm(axes = normed_465_axes_0, epsilon = var_13622_to_fp16, x = input_387)[name = string("normed_465_cast_fp16")]; - tensor normed_467_begin_0 = const()[name = string("normed_467_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_467_end_0 = const()[name = string("normed_467_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_467_end_mask_0 = const()[name = string("normed_467_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_467 = slice_by_index(begin = normed_467_begin_0, end = normed_467_end_0, end_mask = normed_467_end_mask_0, x = normed_465_cast_fp16)[name = string("normed_467")]; - tensor var_13641_to_fp16 = const()[name = string("op_13641_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414260800)))]; - tensor k_39_cast_fp16 = mul(x = normed_467, y = var_13641_to_fp16)[name = string("k_39_cast_fp16")]; - tensor var_13643_cast_fp16 = mul(x = q_39_cast_fp16, y = cos_1_cast_fp16)[name = string("op_13643_cast_fp16")]; - tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_77_cast_fp16 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_39_cast_fp16)[name = string("x1_77_cast_fp16")]; - tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_77_cast_fp16 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_39_cast_fp16)[name = string("x2_77_cast_fp16")]; - fp16 const_736_promoted_to_fp16 = const()[name = string("const_736_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13664_cast_fp16 = mul(x = x2_77_cast_fp16, y = const_736_promoted_to_fp16)[name = string("op_13664_cast_fp16")]; - int32 var_13666 = const()[name = string("op_13666"), val = int32(-1)]; - bool var_13667_interleave_0 = const()[name = string("op_13667_interleave_0"), val = bool(false)]; - tensor var_13667_cast_fp16 = concat(axis = var_13666, interleave = var_13667_interleave_0, values = (var_13664_cast_fp16, x1_77_cast_fp16))[name = string("op_13667_cast_fp16")]; - tensor var_13668_cast_fp16 = mul(x = var_13667_cast_fp16, y = sin_1_cast_fp16)[name = string("op_13668_cast_fp16")]; - tensor query_states_77_cast_fp16 = add(x = var_13643_cast_fp16, y = var_13668_cast_fp16)[name = string("query_states_77_cast_fp16")]; - tensor var_13671_cast_fp16 = mul(x = k_39_cast_fp16, y = cos_1_cast_fp16)[name = string("op_13671_cast_fp16")]; - tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_79_cast_fp16 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_39_cast_fp16)[name = string("x1_79_cast_fp16")]; - tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_79_cast_fp16 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_39_cast_fp16)[name = string("x2_79_cast_fp16")]; - fp16 const_739_promoted_to_fp16 = const()[name = string("const_739_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13692_cast_fp16 = mul(x = x2_79_cast_fp16, y = const_739_promoted_to_fp16)[name = string("op_13692_cast_fp16")]; - int32 var_13694 = const()[name = string("op_13694"), val = int32(-1)]; - bool var_13695_interleave_0 = const()[name = string("op_13695_interleave_0"), val = bool(false)]; - tensor var_13695_cast_fp16 = concat(axis = var_13694, interleave = var_13695_interleave_0, values = (var_13692_cast_fp16, x1_79_cast_fp16))[name = string("op_13695_cast_fp16")]; - tensor var_13696_cast_fp16 = mul(x = var_13695_cast_fp16, y = sin_1_cast_fp16)[name = string("op_13696_cast_fp16")]; - tensor key_states_77_cast_fp16 = add(x = var_13671_cast_fp16, y = var_13696_cast_fp16)[name = string("key_states_77_cast_fp16")]; - tensor key_slice_33_begin_0 = const()[name = string("key_slice_33_begin_0"), val = tensor([16, 0, 0, 0])]; - tensor key_slice_33_end_0 = const()[name = string("key_slice_33_end_0"), val = tensor([17, 1, 512, 256])]; - tensor key_slice_33_end_mask_0 = const()[name = string("key_slice_33_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_33_cast_fp16 = slice_by_index(begin = key_slice_33_begin_0, end = key_slice_33_end_0, end_mask = key_slice_33_end_mask_0, x = coreml_update_state_89)[name = string("key_slice_33_cast_fp16")]; - tensor key_tail_33_begin_0 = const()[name = string("key_tail_33_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_33_end_0 = const()[name = string("key_tail_33_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_33_cast_fp16 = slice_by_index(begin = key_tail_33_begin_0, end = key_tail_33_end_0, x = key_slice_33_cast_fp16)[name = string("key_tail_33_cast_fp16")]; - int32 var_13709 = const()[name = string("op_13709"), val = int32(2)]; - bool shifted_key_33_interleave_0 = const()[name = string("shifted_key_33_interleave_0"), val = bool(false)]; - tensor shifted_key_33_cast_fp16 = concat(axis = var_13709, interleave = shifted_key_33_interleave_0, values = (key_tail_33_cast_fp16, key_states_77_cast_fp16))[name = string("shifted_key_33_cast_fp16")]; - tensor concat_88 = const()[name = string("concat_88"), val = tensor([16, 0, 0, 0])]; - tensor concat_89 = const()[name = string("concat_89"), val = tensor([17, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16 = slice_update(begin = concat_88, begin_mask = model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0, end = concat_89, end_mask = model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_33_stride_0, update = shifted_key_33_cast_fp16, x = coreml_update_state_89)[name = string("model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_90_write_state")]; - tensor coreml_update_state_90 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_90")]; - tensor value_slice_33_begin_0 = const()[name = string("value_slice_33_begin_0"), val = tensor([38, 0, 0, 0])]; - tensor value_slice_33_end_0 = const()[name = string("value_slice_33_end_0"), val = tensor([39, 1, 512, 256])]; - tensor value_slice_33_end_mask_0 = const()[name = string("value_slice_33_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_33_cast_fp16 = slice_by_index(begin = value_slice_33_begin_0, end = value_slice_33_end_0, end_mask = value_slice_33_end_mask_0, x = coreml_update_state_90)[name = string("value_slice_33_cast_fp16")]; - tensor value_tail_33_begin_0 = const()[name = string("value_tail_33_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_33_end_0 = const()[name = string("value_tail_33_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_33_cast_fp16 = slice_by_index(begin = value_tail_33_begin_0, end = value_tail_33_end_0, x = value_slice_33_cast_fp16)[name = string("value_tail_33_cast_fp16")]; - int32 var_13743 = const()[name = string("op_13743"), val = int32(2)]; - bool shifted_value_33_interleave_0 = const()[name = string("shifted_value_33_interleave_0"), val = bool(false)]; - tensor shifted_value_33_cast_fp16 = concat(axis = var_13743, interleave = shifted_value_33_interleave_0, values = (value_tail_33_cast_fp16, var_13583))[name = string("shifted_value_33_cast_fp16")]; - tensor concat_90 = const()[name = string("concat_90"), val = tensor([38, 0, 0, 0])]; - tensor concat_91 = const()[name = string("concat_91"), val = tensor([39, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16 = slice_update(begin = concat_90, begin_mask = model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0, end = concat_91, end_mask = model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_34_stride_0, update = shifted_value_33_cast_fp16, x = coreml_update_state_90)[name = string("model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_91_write_state")]; - tensor coreml_update_state_91 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_91")]; - tensor var_13771_begin_0 = const()[name = string("op_13771_begin_0"), val = tensor([16, 0, 0, 0])]; - tensor var_13771_end_0 = const()[name = string("op_13771_end_0"), val = tensor([17, 1, 512, 256])]; - tensor var_13771_end_mask_0 = const()[name = string("op_13771_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_13771_cast_fp16 = slice_by_index(begin = var_13771_begin_0, end = var_13771_end_0, end_mask = var_13771_end_mask_0, x = coreml_update_state_91)[name = string("op_13771_cast_fp16")]; - tensor var_13778_begin_0 = const()[name = string("op_13778_begin_0"), val = tensor([38, 0, 0, 0])]; - tensor var_13778_end_0 = const()[name = string("op_13778_end_0"), val = tensor([39, 1, 512, 256])]; - tensor var_13778_end_mask_0 = const()[name = string("op_13778_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_13778_cast_fp16 = slice_by_index(begin = var_13778_begin_0, end = var_13778_end_0, end_mask = var_13778_end_mask_0, x = coreml_update_state_91)[name = string("op_13778_cast_fp16")]; - tensor var_13815 = const()[name = string("op_13815"), val = tensor([1, 4, 1, 1])]; - tensor x_309_cast_fp16 = tile(reps = var_13815, x = var_13771_cast_fp16)[name = string("x_309_cast_fp16")]; - tensor var_13835 = const()[name = string("op_13835"), val = tensor([1, 4, 1, 1])]; - tensor x_315_cast_fp16 = tile(reps = var_13835, x = var_13778_cast_fp16)[name = string("x_315_cast_fp16")]; - bool var_13862_transpose_x_1 = const()[name = string("op_13862_transpose_x_1"), val = bool(false)]; - bool var_13862_transpose_y_1 = const()[name = string("op_13862_transpose_y_1"), val = bool(true)]; - tensor var_13862 = matmul(transpose_x = var_13862_transpose_x_1, transpose_y = var_13862_transpose_y_1, x = query_states_77_cast_fp16, y = x_309_cast_fp16)[name = string("op_13862")]; - fp16 var_13863_to_fp16 = const()[name = string("op_13863_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_115_cast_fp16 = mul(x = var_13862, y = var_13863_to_fp16)[name = string("attn_weights_115_cast_fp16")]; - tensor attn_weights_117_cast_fp16 = add(x = attn_weights_115_cast_fp16, y = var_2059)[name = string("attn_weights_117_cast_fp16")]; - int32 var_13898 = const()[name = string("op_13898"), val = int32(-1)]; - tensor attn_weights_119_cast_fp16 = softmax(axis = var_13898, x = attn_weights_117_cast_fp16)[name = string("attn_weights_119_cast_fp16")]; - bool attn_output_191_transpose_x_0 = const()[name = string("attn_output_191_transpose_x_0"), val = bool(false)]; - bool attn_output_191_transpose_y_0 = const()[name = string("attn_output_191_transpose_y_0"), val = bool(false)]; - tensor attn_output_191_cast_fp16 = matmul(transpose_x = attn_output_191_transpose_x_0, transpose_y = attn_output_191_transpose_y_0, x = attn_weights_119_cast_fp16, y = x_315_cast_fp16)[name = string("attn_output_191_cast_fp16")]; - tensor var_13909_perm_0 = const()[name = string("op_13909_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_13913 = const()[name = string("op_13913"), val = tensor([1, 1, 1024])]; - tensor var_13909_cast_fp16 = transpose(perm = var_13909_perm_0, x = attn_output_191_cast_fp16)[name = string("transpose_40")]; - tensor attn_output_195_cast_fp16 = reshape(shape = var_13913, x = var_13909_cast_fp16)[name = string("attn_output_195_cast_fp16")]; - tensor var_13918 = const()[name = string("op_13918"), val = tensor([0, 2, 1])]; - string var_13934_pad_type_0 = const()[name = string("op_13934_pad_type_0"), val = string("valid")]; - int32 var_13934_groups_0 = const()[name = string("op_13934_groups_0"), val = int32(1)]; - tensor var_13934_strides_0 = const()[name = string("op_13934_strides_0"), val = tensor([1])]; - tensor var_13934_pad_0 = const()[name = string("op_13934_pad_0"), val = tensor([0, 0])]; - tensor var_13934_dilations_0 = const()[name = string("op_13934_dilations_0"), val = tensor([1])]; - tensor squeeze_19_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414261376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415146176))))[name = string("squeeze_19_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_13919_cast_fp16 = transpose(perm = var_13918, x = attn_output_195_cast_fp16)[name = string("transpose_39")]; - tensor var_13934_cast_fp16 = conv(dilations = var_13934_dilations_0, groups = var_13934_groups_0, pad = var_13934_pad_0, pad_type = var_13934_pad_type_0, strides = var_13934_strides_0, weight = squeeze_19_cast_fp16_to_fp32_to_fp16_palettized, x = var_13919_cast_fp16)[name = string("op_13934_cast_fp16")]; - tensor var_13938 = const()[name = string("op_13938"), val = tensor([0, 2, 1])]; - int32 var_13949 = const()[name = string("op_13949"), val = int32(-1)]; - fp16 const_748_promoted_to_fp16 = const()[name = string("const_748_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_313_cast_fp16 = transpose(perm = var_13938, x = var_13934_cast_fp16)[name = string("transpose_38")]; - tensor var_13951_cast_fp16 = mul(x = hidden_states_313_cast_fp16, y = const_748_promoted_to_fp16)[name = string("op_13951_cast_fp16")]; - bool input_391_interleave_0 = const()[name = string("input_391_interleave_0"), val = bool(false)]; - tensor input_391_cast_fp16 = concat(axis = var_13949, interleave = input_391_interleave_0, values = (hidden_states_313_cast_fp16, var_13951_cast_fp16))[name = string("input_391_cast_fp16")]; - tensor normed_469_axes_0 = const()[name = string("normed_469_axes_0"), val = tensor([-1])]; - fp16 var_13946_to_fp16 = const()[name = string("op_13946_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_469_cast_fp16 = layer_norm(axes = normed_469_axes_0, epsilon = var_13946_to_fp16, x = input_391_cast_fp16)[name = string("normed_469_cast_fp16")]; - tensor normed_471_begin_0 = const()[name = string("normed_471_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_471_end_0 = const()[name = string("normed_471_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_471_end_mask_0 = const()[name = string("normed_471_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_471_cast_fp16 = slice_by_index(begin = normed_471_begin_0, end = normed_471_end_0, end_mask = normed_471_end_mask_0, x = normed_469_cast_fp16)[name = string("normed_471_cast_fp16")]; - tensor var_13965_to_fp16 = const()[name = string("op_13965_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415164672)))]; - tensor attn_output_199_cast_fp16 = mul(x = normed_471_cast_fp16, y = var_13965_to_fp16)[name = string("attn_output_199_cast_fp16")]; - tensor hidden_states_315_cast_fp16 = add(x = hidden_states_305_cast_fp16, y = attn_output_199_cast_fp16)[name = string("hidden_states_315_cast_fp16")]; - int32 var_13978 = const()[name = string("op_13978"), val = int32(-1)]; - fp16 const_752_promoted_to_fp16 = const()[name = string("const_752_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13980_cast_fp16 = mul(x = hidden_states_315_cast_fp16, y = const_752_promoted_to_fp16)[name = string("op_13980_cast_fp16")]; - bool input_393_interleave_0 = const()[name = string("input_393_interleave_0"), val = bool(false)]; - tensor input_393_cast_fp16 = concat(axis = var_13978, interleave = input_393_interleave_0, values = (hidden_states_315_cast_fp16, var_13980_cast_fp16))[name = string("input_393_cast_fp16")]; - tensor normed_473_axes_0 = const()[name = string("normed_473_axes_0"), val = tensor([-1])]; - fp16 var_13975_to_fp16 = const()[name = string("op_13975_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_473_cast_fp16 = layer_norm(axes = normed_473_axes_0, epsilon = var_13975_to_fp16, x = input_393_cast_fp16)[name = string("normed_473_cast_fp16")]; - tensor normed_475_begin_0 = const()[name = string("normed_475_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_475_end_0 = const()[name = string("normed_475_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_475_end_mask_0 = const()[name = string("normed_475_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_475_cast_fp16 = slice_by_index(begin = normed_475_begin_0, end = normed_475_end_0, end_mask = normed_475_end_mask_0, x = normed_473_cast_fp16)[name = string("normed_475_cast_fp16")]; - tensor var_13994_to_fp16 = const()[name = string("op_13994_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415167040)))]; - tensor x_317_cast_fp16 = mul(x = normed_475_cast_fp16, y = var_13994_to_fp16)[name = string("x_317_cast_fp16")]; - tensor var_14006 = const()[name = string("op_14006"), val = tensor([0, 2, 1])]; - tensor input_395_axes_0 = const()[name = string("input_395_axes_0"), val = tensor([2])]; - tensor var_14007_cast_fp16 = transpose(perm = var_14006, x = x_317_cast_fp16)[name = string("transpose_37")]; - tensor input_395_cast_fp16 = expand_dims(axes = input_395_axes_0, x = var_14007_cast_fp16)[name = string("input_395_cast_fp16")]; - string x_319_pad_type_0 = const()[name = string("x_319_pad_type_0"), val = string("valid")]; - tensor x_319_strides_0 = const()[name = string("x_319_strides_0"), val = tensor([1, 1])]; - tensor x_319_pad_0 = const()[name = string("x_319_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_319_dilations_0 = const()[name = string("x_319_dilations_0"), val = tensor([1, 1])]; - int32 x_319_groups_0 = const()[name = string("x_319_groups_0"), val = int32(1)]; - tensor model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(792110464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(798082496))))[name = string("model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_319_cast_fp16 = conv(dilations = x_319_dilations_0, groups = x_319_groups_0, pad = x_319_pad_0, pad_type = x_319_pad_type_0, strides = x_319_strides_0, weight = model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_395_cast_fp16)[name = string("x_319_cast_fp16")]; - string b_39_pad_type_0 = const()[name = string("b_39_pad_type_0"), val = string("valid")]; - tensor b_39_strides_0 = const()[name = string("b_39_strides_0"), val = tensor([1, 1])]; - tensor b_39_pad_0 = const()[name = string("b_39_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_39_dilations_0 = const()[name = string("b_39_dilations_0"), val = tensor([1, 1])]; - int32 b_39_groups_0 = const()[name = string("b_39_groups_0"), val = int32(1)]; - tensor model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(798193152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(804165184))))[name = string("model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_39_cast_fp16 = conv(dilations = b_39_dilations_0, groups = b_39_groups_0, pad = b_39_pad_0, pad_type = b_39_pad_type_0, strides = b_39_strides_0, weight = model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_395_cast_fp16)[name = string("b_39_cast_fp16")]; - string var_14032_mode_0 = const()[name = string("op_14032_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_14032_cast_fp16 = gelu(mode = var_14032_mode_0, x = x_319_cast_fp16)[name = string("op_14032_cast_fp16")]; - tensor input_397_cast_fp16 = mul(x = var_14032_cast_fp16, y = b_39_cast_fp16)[name = string("input_397_cast_fp16")]; - string e_39_pad_type_0 = const()[name = string("e_39_pad_type_0"), val = string("valid")]; - tensor e_39_strides_0 = const()[name = string("e_39_strides_0"), val = tensor([1, 1])]; - tensor e_39_pad_0 = const()[name = string("e_39_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_39_dilations_0 = const()[name = string("e_39_dilations_0"), val = tensor([1, 1])]; - int32 e_39_groups_0 = const()[name = string("e_39_groups_0"), val = int32(1)]; - tensor model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427334784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433306816))))[name = string("model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_39_cast_fp16 = conv(dilations = e_39_dilations_0, groups = e_39_groups_0, pad = e_39_pad_0, pad_type = e_39_pad_type_0, strides = e_39_strides_0, weight = model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_397_cast_fp16)[name = string("e_39_cast_fp16")]; - tensor var_14040_axes_0 = const()[name = string("op_14040_axes_0"), val = tensor([2])]; - tensor var_14040_cast_fp16 = squeeze(axes = var_14040_axes_0, x = e_39_cast_fp16)[name = string("op_14040_cast_fp16")]; - tensor var_14041 = const()[name = string("op_14041"), val = tensor([0, 2, 1])]; - int32 var_14052 = const()[name = string("op_14052"), val = int32(-1)]; - fp16 const_756_promoted_to_fp16 = const()[name = string("const_756_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_317_cast_fp16 = transpose(perm = var_14041, x = var_14040_cast_fp16)[name = string("transpose_36")]; - tensor var_14054_cast_fp16 = mul(x = hidden_states_317_cast_fp16, y = const_756_promoted_to_fp16)[name = string("op_14054_cast_fp16")]; - bool input_399_interleave_0 = const()[name = string("input_399_interleave_0"), val = bool(false)]; - tensor input_399_cast_fp16 = concat(axis = var_14052, interleave = input_399_interleave_0, values = (hidden_states_317_cast_fp16, var_14054_cast_fp16))[name = string("input_399_cast_fp16")]; - tensor normed_477_axes_0 = const()[name = string("normed_477_axes_0"), val = tensor([-1])]; - fp16 var_14049_to_fp16 = const()[name = string("op_14049_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_477_cast_fp16 = layer_norm(axes = normed_477_axes_0, epsilon = var_14049_to_fp16, x = input_399_cast_fp16)[name = string("normed_477_cast_fp16")]; - tensor normed_479_begin_0 = const()[name = string("normed_479_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_479_end_0 = const()[name = string("normed_479_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_479_end_mask_0 = const()[name = string("normed_479_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_479_cast_fp16 = slice_by_index(begin = normed_479_begin_0, end = normed_479_end_0, end_mask = normed_479_end_mask_0, x = normed_477_cast_fp16)[name = string("normed_479_cast_fp16")]; - tensor var_14068_to_fp16 = const()[name = string("op_14068_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433325312)))]; - tensor hidden_states_319_cast_fp16 = mul(x = normed_479_cast_fp16, y = var_14068_to_fp16)[name = string("hidden_states_319_cast_fp16")]; - tensor hidden_states_321_cast_fp16 = add(x = hidden_states_315_cast_fp16, y = hidden_states_319_cast_fp16)[name = string("hidden_states_321_cast_fp16")]; - int32 var_14119 = const()[name = string("op_14119"), val = int32(-1)]; - fp16 const_760_promoted_to_fp16 = const()[name = string("const_760_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14121_cast_fp16 = mul(x = hidden_states_321_cast_fp16, y = const_760_promoted_to_fp16)[name = string("op_14121_cast_fp16")]; - bool input_401_interleave_0 = const()[name = string("input_401_interleave_0"), val = bool(false)]; - tensor input_401_cast_fp16 = concat(axis = var_14119, interleave = input_401_interleave_0, values = (hidden_states_321_cast_fp16, var_14121_cast_fp16))[name = string("input_401_cast_fp16")]; - tensor normed_481_axes_0 = const()[name = string("normed_481_axes_0"), val = tensor([-1])]; - fp16 var_14116_to_fp16 = const()[name = string("op_14116_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_481_cast_fp16 = layer_norm(axes = normed_481_axes_0, epsilon = var_14116_to_fp16, x = input_401_cast_fp16)[name = string("normed_481_cast_fp16")]; - tensor normed_483_begin_0 = const()[name = string("normed_483_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_483_end_0 = const()[name = string("normed_483_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_483_end_mask_0 = const()[name = string("normed_483_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_483_cast_fp16 = slice_by_index(begin = normed_483_begin_0, end = normed_483_end_0, end_mask = normed_483_end_mask_0, x = normed_481_cast_fp16)[name = string("normed_483_cast_fp16")]; - tensor var_14135_to_fp16 = const()[name = string("op_14135_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433327680)))]; - tensor hidden_states_323_cast_fp16 = mul(x = normed_483_cast_fp16, y = var_14135_to_fp16)[name = string("hidden_states_323_cast_fp16")]; - tensor var_14140 = const()[name = string("op_14140"), val = tensor([0, 2, 1])]; - tensor var_14143_axes_0 = const()[name = string("op_14143_axes_0"), val = tensor([2])]; - tensor var_14141_cast_fp16 = transpose(perm = var_14140, x = hidden_states_323_cast_fp16)[name = string("transpose_35")]; - tensor var_14143_cast_fp16 = expand_dims(axes = var_14143_axes_0, x = var_14141_cast_fp16)[name = string("op_14143_cast_fp16")]; - string var_14159_pad_type_0 = const()[name = string("op_14159_pad_type_0"), val = string("valid")]; - tensor var_14159_strides_0 = const()[name = string("op_14159_strides_0"), val = tensor([1, 1])]; - tensor var_14159_pad_0 = const()[name = string("op_14159_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_14159_dilations_0 = const()[name = string("op_14159_dilations_0"), val = tensor([1, 1])]; - int32 var_14159_groups_0 = const()[name = string("op_14159_groups_0"), val = int32(1)]; - tensor var_14159 = conv(dilations = var_14159_dilations_0, groups = var_14159_groups_0, pad = var_14159_pad_0, pad_type = var_14159_pad_type_0, strides = var_14159_strides_0, weight = model_model_layers_20_self_attn_q_proj_weight_palettized, x = var_14143_cast_fp16)[name = string("op_14159")]; - tensor var_14164 = const()[name = string("op_14164"), val = tensor([1, 4, 1, 256])]; - tensor var_14165 = reshape(shape = var_14164, x = var_14159)[name = string("op_14165")]; - string var_14181_pad_type_0 = const()[name = string("op_14181_pad_type_0"), val = string("valid")]; - tensor var_14181_strides_0 = const()[name = string("op_14181_strides_0"), val = tensor([1, 1])]; - tensor var_14181_pad_0 = const()[name = string("op_14181_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_14181_dilations_0 = const()[name = string("op_14181_dilations_0"), val = tensor([1, 1])]; - int32 var_14181_groups_0 = const()[name = string("op_14181_groups_0"), val = int32(1)]; - tensor var_14181 = conv(dilations = var_14181_dilations_0, groups = var_14181_groups_0, pad = var_14181_pad_0, pad_type = var_14181_pad_type_0, strides = var_14181_strides_0, weight = model_model_layers_20_self_attn_k_proj_weight_palettized, x = var_14143_cast_fp16)[name = string("op_14181")]; - tensor var_14186 = const()[name = string("op_14186"), val = tensor([1, 1, 1, 256])]; - tensor var_14187 = reshape(shape = var_14186, x = var_14181)[name = string("op_14187")]; - string var_14203_pad_type_0 = const()[name = string("op_14203_pad_type_0"), val = string("valid")]; - tensor var_14203_strides_0 = const()[name = string("op_14203_strides_0"), val = tensor([1, 1])]; - tensor var_14203_pad_0 = const()[name = string("op_14203_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_14203_dilations_0 = const()[name = string("op_14203_dilations_0"), val = tensor([1, 1])]; - int32 var_14203_groups_0 = const()[name = string("op_14203_groups_0"), val = int32(1)]; - tensor var_14203 = conv(dilations = var_14203_dilations_0, groups = var_14203_groups_0, pad = var_14203_pad_0, pad_type = var_14203_pad_type_0, strides = var_14203_strides_0, weight = model_model_layers_20_self_attn_v_proj_weight_palettized, x = var_14143_cast_fp16)[name = string("op_14203")]; - tensor var_14208 = const()[name = string("op_14208"), val = tensor([1, 1, 1, 256])]; - tensor var_14209 = reshape(shape = var_14208, x = var_14203)[name = string("op_14209")]; - int32 var_14224 = const()[name = string("op_14224"), val = int32(-1)]; - fp16 const_764_promoted = const()[name = string("const_764_promoted"), val = fp16(-0x1p+0)]; - tensor var_14226 = mul(x = var_14165, y = const_764_promoted)[name = string("op_14226")]; - bool input_405_interleave_0 = const()[name = string("input_405_interleave_0"), val = bool(false)]; - tensor input_405 = concat(axis = var_14224, interleave = input_405_interleave_0, values = (var_14165, var_14226))[name = string("input_405")]; - tensor normed_485_axes_0 = const()[name = string("normed_485_axes_0"), val = tensor([-1])]; - fp16 var_14221_to_fp16 = const()[name = string("op_14221_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_485_cast_fp16 = layer_norm(axes = normed_485_axes_0, epsilon = var_14221_to_fp16, x = input_405)[name = string("normed_485_cast_fp16")]; - tensor normed_487_begin_0 = const()[name = string("normed_487_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_487_end_0 = const()[name = string("normed_487_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_487_end_mask_0 = const()[name = string("normed_487_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_487 = slice_by_index(begin = normed_487_begin_0, end = normed_487_end_0, end_mask = normed_487_end_mask_0, x = normed_485_cast_fp16)[name = string("normed_487")]; - tensor var_14240_to_fp16 = const()[name = string("op_14240_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433330048)))]; - tensor q_41_cast_fp16 = mul(x = normed_487, y = var_14240_to_fp16)[name = string("q_41_cast_fp16")]; - int32 var_14251 = const()[name = string("op_14251"), val = int32(-1)]; - fp16 const_768_promoted = const()[name = string("const_768_promoted"), val = fp16(-0x1p+0)]; - tensor var_14253 = mul(x = var_14187, y = const_768_promoted)[name = string("op_14253")]; - bool input_407_interleave_0 = const()[name = string("input_407_interleave_0"), val = bool(false)]; - tensor input_407 = concat(axis = var_14251, interleave = input_407_interleave_0, values = (var_14187, var_14253))[name = string("input_407")]; - tensor normed_489_axes_0 = const()[name = string("normed_489_axes_0"), val = tensor([-1])]; - fp16 var_14248_to_fp16 = const()[name = string("op_14248_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_489_cast_fp16 = layer_norm(axes = normed_489_axes_0, epsilon = var_14248_to_fp16, x = input_407)[name = string("normed_489_cast_fp16")]; - tensor normed_491_begin_0 = const()[name = string("normed_491_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_491_end_0 = const()[name = string("normed_491_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_491_end_mask_0 = const()[name = string("normed_491_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_491 = slice_by_index(begin = normed_491_begin_0, end = normed_491_end_0, end_mask = normed_491_end_mask_0, x = normed_489_cast_fp16)[name = string("normed_491")]; - tensor var_14267_to_fp16 = const()[name = string("op_14267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433330624)))]; - tensor k_41_cast_fp16 = mul(x = normed_491, y = var_14267_to_fp16)[name = string("k_41_cast_fp16")]; - tensor var_14269_cast_fp16 = mul(x = q_41_cast_fp16, y = cos_1_cast_fp16)[name = string("op_14269_cast_fp16")]; - tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_81_cast_fp16 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_41_cast_fp16)[name = string("x1_81_cast_fp16")]; - tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_81_cast_fp16 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_41_cast_fp16)[name = string("x2_81_cast_fp16")]; - fp16 const_774_promoted_to_fp16 = const()[name = string("const_774_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14290_cast_fp16 = mul(x = x2_81_cast_fp16, y = const_774_promoted_to_fp16)[name = string("op_14290_cast_fp16")]; - int32 var_14292 = const()[name = string("op_14292"), val = int32(-1)]; - bool var_14293_interleave_0 = const()[name = string("op_14293_interleave_0"), val = bool(false)]; - tensor var_14293_cast_fp16 = concat(axis = var_14292, interleave = var_14293_interleave_0, values = (var_14290_cast_fp16, x1_81_cast_fp16))[name = string("op_14293_cast_fp16")]; - tensor var_14294_cast_fp16 = mul(x = var_14293_cast_fp16, y = sin_1_cast_fp16)[name = string("op_14294_cast_fp16")]; - tensor query_states_81_cast_fp16 = add(x = var_14269_cast_fp16, y = var_14294_cast_fp16)[name = string("query_states_81_cast_fp16")]; - tensor var_14297_cast_fp16 = mul(x = k_41_cast_fp16, y = cos_1_cast_fp16)[name = string("op_14297_cast_fp16")]; - tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_83_cast_fp16 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_41_cast_fp16)[name = string("x1_83_cast_fp16")]; - tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_83_cast_fp16 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_41_cast_fp16)[name = string("x2_83_cast_fp16")]; - fp16 const_777_promoted_to_fp16 = const()[name = string("const_777_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14318_cast_fp16 = mul(x = x2_83_cast_fp16, y = const_777_promoted_to_fp16)[name = string("op_14318_cast_fp16")]; - int32 var_14320 = const()[name = string("op_14320"), val = int32(-1)]; - bool var_14321_interleave_0 = const()[name = string("op_14321_interleave_0"), val = bool(false)]; - tensor var_14321_cast_fp16 = concat(axis = var_14320, interleave = var_14321_interleave_0, values = (var_14318_cast_fp16, x1_83_cast_fp16))[name = string("op_14321_cast_fp16")]; - tensor var_14322_cast_fp16 = mul(x = var_14321_cast_fp16, y = sin_1_cast_fp16)[name = string("op_14322_cast_fp16")]; - tensor key_states_81_cast_fp16 = add(x = var_14297_cast_fp16, y = var_14322_cast_fp16)[name = string("key_states_81_cast_fp16")]; - tensor key_slice_35_begin_0 = const()[name = string("key_slice_35_begin_0"), val = tensor([17, 0, 0, 0])]; - tensor key_slice_35_end_0 = const()[name = string("key_slice_35_end_0"), val = tensor([18, 1, 512, 256])]; - tensor key_slice_35_end_mask_0 = const()[name = string("key_slice_35_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_35_cast_fp16 = slice_by_index(begin = key_slice_35_begin_0, end = key_slice_35_end_0, end_mask = key_slice_35_end_mask_0, x = coreml_update_state_91)[name = string("key_slice_35_cast_fp16")]; - tensor key_tail_35_begin_0 = const()[name = string("key_tail_35_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_35_end_0 = const()[name = string("key_tail_35_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_35_cast_fp16 = slice_by_index(begin = key_tail_35_begin_0, end = key_tail_35_end_0, x = key_slice_35_cast_fp16)[name = string("key_tail_35_cast_fp16")]; - int32 var_14335 = const()[name = string("op_14335"), val = int32(2)]; - bool shifted_key_35_interleave_0 = const()[name = string("shifted_key_35_interleave_0"), val = bool(false)]; - tensor shifted_key_35_cast_fp16 = concat(axis = var_14335, interleave = shifted_key_35_interleave_0, values = (key_tail_35_cast_fp16, key_states_81_cast_fp16))[name = string("shifted_key_35_cast_fp16")]; - tensor concat_92 = const()[name = string("concat_92"), val = tensor([17, 0, 0, 0])]; - tensor concat_93 = const()[name = string("concat_93"), val = tensor([18, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16 = slice_update(begin = concat_92, begin_mask = model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0, end = concat_93, end_mask = model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_35_stride_0, update = shifted_key_35_cast_fp16, x = coreml_update_state_91)[name = string("model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_92_write_state")]; - tensor coreml_update_state_92 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_92")]; - tensor value_slice_35_begin_0 = const()[name = string("value_slice_35_begin_0"), val = tensor([39, 0, 0, 0])]; - tensor value_slice_35_end_0 = const()[name = string("value_slice_35_end_0"), val = tensor([40, 1, 512, 256])]; - tensor value_slice_35_end_mask_0 = const()[name = string("value_slice_35_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_35_cast_fp16 = slice_by_index(begin = value_slice_35_begin_0, end = value_slice_35_end_0, end_mask = value_slice_35_end_mask_0, x = coreml_update_state_92)[name = string("value_slice_35_cast_fp16")]; - tensor value_tail_35_begin_0 = const()[name = string("value_tail_35_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_35_end_0 = const()[name = string("value_tail_35_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_35_cast_fp16 = slice_by_index(begin = value_tail_35_begin_0, end = value_tail_35_end_0, x = value_slice_35_cast_fp16)[name = string("value_tail_35_cast_fp16")]; - int32 var_14369 = const()[name = string("op_14369"), val = int32(2)]; - bool shifted_value_35_interleave_0 = const()[name = string("shifted_value_35_interleave_0"), val = bool(false)]; - tensor shifted_value_35_cast_fp16 = concat(axis = var_14369, interleave = shifted_value_35_interleave_0, values = (value_tail_35_cast_fp16, var_14209))[name = string("shifted_value_35_cast_fp16")]; - tensor concat_94 = const()[name = string("concat_94"), val = tensor([39, 0, 0, 0])]; - tensor concat_95 = const()[name = string("concat_95"), val = tensor([40, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16 = slice_update(begin = concat_94, begin_mask = model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0, end = concat_95, end_mask = model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_36_stride_0, update = shifted_value_35_cast_fp16, x = coreml_update_state_92)[name = string("model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_93_write_state")]; - tensor coreml_update_state_93 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_93")]; - tensor var_14397_begin_0 = const()[name = string("op_14397_begin_0"), val = tensor([17, 0, 0, 0])]; - tensor var_14397_end_0 = const()[name = string("op_14397_end_0"), val = tensor([18, 1, 512, 256])]; - tensor var_14397_end_mask_0 = const()[name = string("op_14397_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_14397_cast_fp16 = slice_by_index(begin = var_14397_begin_0, end = var_14397_end_0, end_mask = var_14397_end_mask_0, x = coreml_update_state_93)[name = string("op_14397_cast_fp16")]; - tensor var_14404_begin_0 = const()[name = string("op_14404_begin_0"), val = tensor([39, 0, 0, 0])]; - tensor var_14404_end_0 = const()[name = string("op_14404_end_0"), val = tensor([40, 1, 512, 256])]; - tensor var_14404_end_mask_0 = const()[name = string("op_14404_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_14404_cast_fp16 = slice_by_index(begin = var_14404_begin_0, end = var_14404_end_0, end_mask = var_14404_end_mask_0, x = coreml_update_state_93)[name = string("op_14404_cast_fp16")]; - tensor var_14441 = const()[name = string("op_14441"), val = tensor([1, 4, 1, 1])]; - tensor x_325_cast_fp16 = tile(reps = var_14441, x = var_14397_cast_fp16)[name = string("x_325_cast_fp16")]; - tensor var_14461 = const()[name = string("op_14461"), val = tensor([1, 4, 1, 1])]; - tensor x_331_cast_fp16 = tile(reps = var_14461, x = var_14404_cast_fp16)[name = string("x_331_cast_fp16")]; - bool var_14488_transpose_x_1 = const()[name = string("op_14488_transpose_x_1"), val = bool(false)]; - bool var_14488_transpose_y_1 = const()[name = string("op_14488_transpose_y_1"), val = bool(true)]; - tensor var_14488 = matmul(transpose_x = var_14488_transpose_x_1, transpose_y = var_14488_transpose_y_1, x = query_states_81_cast_fp16, y = x_325_cast_fp16)[name = string("op_14488")]; - fp16 var_14489_to_fp16 = const()[name = string("op_14489_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_121_cast_fp16 = mul(x = var_14488, y = var_14489_to_fp16)[name = string("attn_weights_121_cast_fp16")]; - tensor attn_weights_123_cast_fp16 = add(x = attn_weights_121_cast_fp16, y = var_2059)[name = string("attn_weights_123_cast_fp16")]; - int32 var_14524 = const()[name = string("op_14524"), val = int32(-1)]; - tensor attn_weights_125_cast_fp16 = softmax(axis = var_14524, x = attn_weights_123_cast_fp16)[name = string("attn_weights_125_cast_fp16")]; - bool attn_output_201_transpose_x_0 = const()[name = string("attn_output_201_transpose_x_0"), val = bool(false)]; - bool attn_output_201_transpose_y_0 = const()[name = string("attn_output_201_transpose_y_0"), val = bool(false)]; - tensor attn_output_201_cast_fp16 = matmul(transpose_x = attn_output_201_transpose_x_0, transpose_y = attn_output_201_transpose_y_0, x = attn_weights_125_cast_fp16, y = x_331_cast_fp16)[name = string("attn_output_201_cast_fp16")]; - tensor var_14535_perm_0 = const()[name = string("op_14535_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_14539 = const()[name = string("op_14539"), val = tensor([1, 1, 1024])]; - tensor var_14535_cast_fp16 = transpose(perm = var_14535_perm_0, x = attn_output_201_cast_fp16)[name = string("transpose_34")]; - tensor attn_output_205_cast_fp16 = reshape(shape = var_14539, x = var_14535_cast_fp16)[name = string("attn_output_205_cast_fp16")]; - tensor var_14544 = const()[name = string("op_14544"), val = tensor([0, 2, 1])]; - string var_14560_pad_type_0 = const()[name = string("op_14560_pad_type_0"), val = string("valid")]; - int32 var_14560_groups_0 = const()[name = string("op_14560_groups_0"), val = int32(1)]; - tensor var_14560_strides_0 = const()[name = string("op_14560_strides_0"), val = tensor([1])]; - tensor var_14560_pad_0 = const()[name = string("op_14560_pad_0"), val = tensor([0, 0])]; - tensor var_14560_dilations_0 = const()[name = string("op_14560_dilations_0"), val = tensor([1])]; - tensor squeeze_20_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433331200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434216000))))[name = string("squeeze_20_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_14545_cast_fp16 = transpose(perm = var_14544, x = attn_output_205_cast_fp16)[name = string("transpose_33")]; - tensor var_14560_cast_fp16 = conv(dilations = var_14560_dilations_0, groups = var_14560_groups_0, pad = var_14560_pad_0, pad_type = var_14560_pad_type_0, strides = var_14560_strides_0, weight = squeeze_20_cast_fp16_to_fp32_to_fp16_palettized, x = var_14545_cast_fp16)[name = string("op_14560_cast_fp16")]; - tensor var_14564 = const()[name = string("op_14564"), val = tensor([0, 2, 1])]; - int32 var_14575 = const()[name = string("op_14575"), val = int32(-1)]; - fp16 const_786_promoted_to_fp16 = const()[name = string("const_786_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_329_cast_fp16 = transpose(perm = var_14564, x = var_14560_cast_fp16)[name = string("transpose_32")]; - tensor var_14577_cast_fp16 = mul(x = hidden_states_329_cast_fp16, y = const_786_promoted_to_fp16)[name = string("op_14577_cast_fp16")]; - bool input_411_interleave_0 = const()[name = string("input_411_interleave_0"), val = bool(false)]; - tensor input_411_cast_fp16 = concat(axis = var_14575, interleave = input_411_interleave_0, values = (hidden_states_329_cast_fp16, var_14577_cast_fp16))[name = string("input_411_cast_fp16")]; - tensor normed_493_axes_0 = const()[name = string("normed_493_axes_0"), val = tensor([-1])]; - fp16 var_14572_to_fp16 = const()[name = string("op_14572_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_493_cast_fp16 = layer_norm(axes = normed_493_axes_0, epsilon = var_14572_to_fp16, x = input_411_cast_fp16)[name = string("normed_493_cast_fp16")]; - tensor normed_495_begin_0 = const()[name = string("normed_495_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_495_end_0 = const()[name = string("normed_495_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_495_end_mask_0 = const()[name = string("normed_495_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_495_cast_fp16 = slice_by_index(begin = normed_495_begin_0, end = normed_495_end_0, end_mask = normed_495_end_mask_0, x = normed_493_cast_fp16)[name = string("normed_495_cast_fp16")]; - tensor var_14591_to_fp16 = const()[name = string("op_14591_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434234496)))]; - tensor attn_output_209_cast_fp16 = mul(x = normed_495_cast_fp16, y = var_14591_to_fp16)[name = string("attn_output_209_cast_fp16")]; - tensor hidden_states_331_cast_fp16 = add(x = hidden_states_321_cast_fp16, y = attn_output_209_cast_fp16)[name = string("hidden_states_331_cast_fp16")]; - int32 var_14604 = const()[name = string("op_14604"), val = int32(-1)]; - fp16 const_790_promoted_to_fp16 = const()[name = string("const_790_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14606_cast_fp16 = mul(x = hidden_states_331_cast_fp16, y = const_790_promoted_to_fp16)[name = string("op_14606_cast_fp16")]; - bool input_413_interleave_0 = const()[name = string("input_413_interleave_0"), val = bool(false)]; - tensor input_413_cast_fp16 = concat(axis = var_14604, interleave = input_413_interleave_0, values = (hidden_states_331_cast_fp16, var_14606_cast_fp16))[name = string("input_413_cast_fp16")]; - tensor normed_497_axes_0 = const()[name = string("normed_497_axes_0"), val = tensor([-1])]; - fp16 var_14601_to_fp16 = const()[name = string("op_14601_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_497_cast_fp16 = layer_norm(axes = normed_497_axes_0, epsilon = var_14601_to_fp16, x = input_413_cast_fp16)[name = string("normed_497_cast_fp16")]; - tensor normed_499_begin_0 = const()[name = string("normed_499_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_499_end_0 = const()[name = string("normed_499_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_499_end_mask_0 = const()[name = string("normed_499_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_499_cast_fp16 = slice_by_index(begin = normed_499_begin_0, end = normed_499_end_0, end_mask = normed_499_end_mask_0, x = normed_497_cast_fp16)[name = string("normed_499_cast_fp16")]; - tensor var_14620_to_fp16 = const()[name = string("op_14620_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434236864)))]; - tensor x_333_cast_fp16 = mul(x = normed_499_cast_fp16, y = var_14620_to_fp16)[name = string("x_333_cast_fp16")]; - tensor var_14632 = const()[name = string("op_14632"), val = tensor([0, 2, 1])]; - tensor input_415_axes_0 = const()[name = string("input_415_axes_0"), val = tensor([2])]; - tensor var_14633_cast_fp16 = transpose(perm = var_14632, x = x_333_cast_fp16)[name = string("transpose_31")]; - tensor input_415_cast_fp16 = expand_dims(axes = input_415_axes_0, x = var_14633_cast_fp16)[name = string("input_415_cast_fp16")]; - string x_335_pad_type_0 = const()[name = string("x_335_pad_type_0"), val = string("valid")]; - tensor x_335_strides_0 = const()[name = string("x_335_strides_0"), val = tensor([1, 1])]; - tensor x_335_pad_0 = const()[name = string("x_335_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_335_dilations_0 = const()[name = string("x_335_dilations_0"), val = tensor([1, 1])]; - int32 x_335_groups_0 = const()[name = string("x_335_groups_0"), val = int32(1)]; - tensor model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434239232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(804275840))))[name = string("model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_335_cast_fp16 = conv(dilations = x_335_dilations_0, groups = x_335_groups_0, pad = x_335_pad_0, pad_type = x_335_pad_type_0, strides = x_335_strides_0, weight = model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_415_cast_fp16)[name = string("x_335_cast_fp16")]; - string b_41_pad_type_0 = const()[name = string("b_41_pad_type_0"), val = string("valid")]; - tensor b_41_strides_0 = const()[name = string("b_41_strides_0"), val = tensor([1, 1])]; - tensor b_41_pad_0 = const()[name = string("b_41_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_41_dilations_0 = const()[name = string("b_41_dilations_0"), val = tensor([1, 1])]; - int32 b_41_groups_0 = const()[name = string("b_41_groups_0"), val = int32(1)]; - tensor model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(804386496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(810358528))))[name = string("model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_41_cast_fp16 = conv(dilations = b_41_dilations_0, groups = b_41_groups_0, pad = b_41_pad_0, pad_type = b_41_pad_type_0, strides = b_41_strides_0, weight = model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_415_cast_fp16)[name = string("b_41_cast_fp16")]; - string var_14658_mode_0 = const()[name = string("op_14658_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_14658_cast_fp16 = gelu(mode = var_14658_mode_0, x = x_335_cast_fp16)[name = string("op_14658_cast_fp16")]; - tensor input_417_cast_fp16 = mul(x = var_14658_cast_fp16, y = b_41_cast_fp16)[name = string("input_417_cast_fp16")]; - string e_41_pad_type_0 = const()[name = string("e_41_pad_type_0"), val = string("valid")]; - tensor e_41_strides_0 = const()[name = string("e_41_strides_0"), val = tensor([1, 1])]; - tensor e_41_pad_0 = const()[name = string("e_41_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_41_dilations_0 = const()[name = string("e_41_dilations_0"), val = tensor([1, 1])]; - int32 e_41_groups_0 = const()[name = string("e_41_groups_0"), val = int32(1)]; - tensor model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446404608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452376640))))[name = string("model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_41_cast_fp16 = conv(dilations = e_41_dilations_0, groups = e_41_groups_0, pad = e_41_pad_0, pad_type = e_41_pad_type_0, strides = e_41_strides_0, weight = model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_417_cast_fp16)[name = string("e_41_cast_fp16")]; - tensor var_14666_axes_0 = const()[name = string("op_14666_axes_0"), val = tensor([2])]; - tensor var_14666_cast_fp16 = squeeze(axes = var_14666_axes_0, x = e_41_cast_fp16)[name = string("op_14666_cast_fp16")]; - tensor var_14667 = const()[name = string("op_14667"), val = tensor([0, 2, 1])]; - int32 var_14678 = const()[name = string("op_14678"), val = int32(-1)]; - fp16 const_794_promoted_to_fp16 = const()[name = string("const_794_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_333_cast_fp16 = transpose(perm = var_14667, x = var_14666_cast_fp16)[name = string("transpose_30")]; - tensor var_14680_cast_fp16 = mul(x = hidden_states_333_cast_fp16, y = const_794_promoted_to_fp16)[name = string("op_14680_cast_fp16")]; - bool input_419_interleave_0 = const()[name = string("input_419_interleave_0"), val = bool(false)]; - tensor input_419_cast_fp16 = concat(axis = var_14678, interleave = input_419_interleave_0, values = (hidden_states_333_cast_fp16, var_14680_cast_fp16))[name = string("input_419_cast_fp16")]; - tensor normed_501_axes_0 = const()[name = string("normed_501_axes_0"), val = tensor([-1])]; - fp16 var_14675_to_fp16 = const()[name = string("op_14675_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_501_cast_fp16 = layer_norm(axes = normed_501_axes_0, epsilon = var_14675_to_fp16, x = input_419_cast_fp16)[name = string("normed_501_cast_fp16")]; - tensor normed_503_begin_0 = const()[name = string("normed_503_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_503_end_0 = const()[name = string("normed_503_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_503_end_mask_0 = const()[name = string("normed_503_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_503_cast_fp16 = slice_by_index(begin = normed_503_begin_0, end = normed_503_end_0, end_mask = normed_503_end_mask_0, x = normed_501_cast_fp16)[name = string("normed_503_cast_fp16")]; - tensor var_14694_to_fp16 = const()[name = string("op_14694_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452395136)))]; - tensor hidden_states_335_cast_fp16 = mul(x = normed_503_cast_fp16, y = var_14694_to_fp16)[name = string("hidden_states_335_cast_fp16")]; - tensor hidden_states_337_cast_fp16 = add(x = hidden_states_331_cast_fp16, y = hidden_states_335_cast_fp16)[name = string("hidden_states_337_cast_fp16")]; - int32 var_14745 = const()[name = string("op_14745"), val = int32(-1)]; - fp16 const_798_promoted_to_fp16 = const()[name = string("const_798_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14747_cast_fp16 = mul(x = hidden_states_337_cast_fp16, y = const_798_promoted_to_fp16)[name = string("op_14747_cast_fp16")]; - bool input_421_interleave_0 = const()[name = string("input_421_interleave_0"), val = bool(false)]; - tensor input_421_cast_fp16 = concat(axis = var_14745, interleave = input_421_interleave_0, values = (hidden_states_337_cast_fp16, var_14747_cast_fp16))[name = string("input_421_cast_fp16")]; - tensor normed_505_axes_0 = const()[name = string("normed_505_axes_0"), val = tensor([-1])]; - fp16 var_14742_to_fp16 = const()[name = string("op_14742_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_505_cast_fp16 = layer_norm(axes = normed_505_axes_0, epsilon = var_14742_to_fp16, x = input_421_cast_fp16)[name = string("normed_505_cast_fp16")]; - tensor normed_507_begin_0 = const()[name = string("normed_507_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_507_end_0 = const()[name = string("normed_507_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_507_end_mask_0 = const()[name = string("normed_507_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_507_cast_fp16 = slice_by_index(begin = normed_507_begin_0, end = normed_507_end_0, end_mask = normed_507_end_mask_0, x = normed_505_cast_fp16)[name = string("normed_507_cast_fp16")]; - tensor var_14761_to_fp16 = const()[name = string("op_14761_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452397504)))]; - tensor hidden_states_339_cast_fp16 = mul(x = normed_507_cast_fp16, y = var_14761_to_fp16)[name = string("hidden_states_339_cast_fp16")]; - tensor var_14766 = const()[name = string("op_14766"), val = tensor([0, 2, 1])]; - tensor var_14769_axes_0 = const()[name = string("op_14769_axes_0"), val = tensor([2])]; - tensor var_14767_cast_fp16 = transpose(perm = var_14766, x = hidden_states_339_cast_fp16)[name = string("transpose_29")]; - tensor var_14769_cast_fp16 = expand_dims(axes = var_14769_axes_0, x = var_14767_cast_fp16)[name = string("op_14769_cast_fp16")]; - string var_14785_pad_type_0 = const()[name = string("op_14785_pad_type_0"), val = string("valid")]; - tensor var_14785_strides_0 = const()[name = string("op_14785_strides_0"), val = tensor([1, 1])]; - tensor var_14785_pad_0 = const()[name = string("op_14785_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_14785_dilations_0 = const()[name = string("op_14785_dilations_0"), val = tensor([1, 1])]; - int32 var_14785_groups_0 = const()[name = string("op_14785_groups_0"), val = int32(1)]; - tensor var_14785 = conv(dilations = var_14785_dilations_0, groups = var_14785_groups_0, pad = var_14785_pad_0, pad_type = var_14785_pad_type_0, strides = var_14785_strides_0, weight = model_model_layers_21_self_attn_q_proj_weight_palettized, x = var_14769_cast_fp16)[name = string("op_14785")]; - tensor var_14790 = const()[name = string("op_14790"), val = tensor([1, 4, 1, 256])]; - tensor var_14791 = reshape(shape = var_14790, x = var_14785)[name = string("op_14791")]; - string var_14807_pad_type_0 = const()[name = string("op_14807_pad_type_0"), val = string("valid")]; - tensor var_14807_strides_0 = const()[name = string("op_14807_strides_0"), val = tensor([1, 1])]; - tensor var_14807_pad_0 = const()[name = string("op_14807_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_14807_dilations_0 = const()[name = string("op_14807_dilations_0"), val = tensor([1, 1])]; - int32 var_14807_groups_0 = const()[name = string("op_14807_groups_0"), val = int32(1)]; - tensor var_14807 = conv(dilations = var_14807_dilations_0, groups = var_14807_groups_0, pad = var_14807_pad_0, pad_type = var_14807_pad_type_0, strides = var_14807_strides_0, weight = model_model_layers_21_self_attn_k_proj_weight_palettized, x = var_14769_cast_fp16)[name = string("op_14807")]; - tensor var_14812 = const()[name = string("op_14812"), val = tensor([1, 1, 1, 256])]; - tensor var_14813 = reshape(shape = var_14812, x = var_14807)[name = string("op_14813")]; - string var_14829_pad_type_0 = const()[name = string("op_14829_pad_type_0"), val = string("valid")]; - tensor var_14829_strides_0 = const()[name = string("op_14829_strides_0"), val = tensor([1, 1])]; - tensor var_14829_pad_0 = const()[name = string("op_14829_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_14829_dilations_0 = const()[name = string("op_14829_dilations_0"), val = tensor([1, 1])]; - int32 var_14829_groups_0 = const()[name = string("op_14829_groups_0"), val = int32(1)]; - tensor var_14829 = conv(dilations = var_14829_dilations_0, groups = var_14829_groups_0, pad = var_14829_pad_0, pad_type = var_14829_pad_type_0, strides = var_14829_strides_0, weight = model_model_layers_21_self_attn_v_proj_weight_palettized, x = var_14769_cast_fp16)[name = string("op_14829")]; - tensor var_14834 = const()[name = string("op_14834"), val = tensor([1, 1, 1, 256])]; - tensor var_14835 = reshape(shape = var_14834, x = var_14829)[name = string("op_14835")]; - int32 var_14850 = const()[name = string("op_14850"), val = int32(-1)]; - fp16 const_802_promoted = const()[name = string("const_802_promoted"), val = fp16(-0x1p+0)]; - tensor var_14852 = mul(x = var_14791, y = const_802_promoted)[name = string("op_14852")]; - bool input_425_interleave_0 = const()[name = string("input_425_interleave_0"), val = bool(false)]; - tensor input_425 = concat(axis = var_14850, interleave = input_425_interleave_0, values = (var_14791, var_14852))[name = string("input_425")]; - tensor normed_509_axes_0 = const()[name = string("normed_509_axes_0"), val = tensor([-1])]; - fp16 var_14847_to_fp16 = const()[name = string("op_14847_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_509_cast_fp16 = layer_norm(axes = normed_509_axes_0, epsilon = var_14847_to_fp16, x = input_425)[name = string("normed_509_cast_fp16")]; - tensor normed_511_begin_0 = const()[name = string("normed_511_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_511_end_0 = const()[name = string("normed_511_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_511_end_mask_0 = const()[name = string("normed_511_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_511 = slice_by_index(begin = normed_511_begin_0, end = normed_511_end_0, end_mask = normed_511_end_mask_0, x = normed_509_cast_fp16)[name = string("normed_511")]; - tensor var_14866_to_fp16 = const()[name = string("op_14866_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452399872)))]; - tensor q_43_cast_fp16 = mul(x = normed_511, y = var_14866_to_fp16)[name = string("q_43_cast_fp16")]; - int32 var_14877 = const()[name = string("op_14877"), val = int32(-1)]; - fp16 const_806_promoted = const()[name = string("const_806_promoted"), val = fp16(-0x1p+0)]; - tensor var_14879 = mul(x = var_14813, y = const_806_promoted)[name = string("op_14879")]; - bool input_427_interleave_0 = const()[name = string("input_427_interleave_0"), val = bool(false)]; - tensor input_427 = concat(axis = var_14877, interleave = input_427_interleave_0, values = (var_14813, var_14879))[name = string("input_427")]; - tensor normed_513_axes_0 = const()[name = string("normed_513_axes_0"), val = tensor([-1])]; - fp16 var_14874_to_fp16 = const()[name = string("op_14874_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_513_cast_fp16 = layer_norm(axes = normed_513_axes_0, epsilon = var_14874_to_fp16, x = input_427)[name = string("normed_513_cast_fp16")]; - tensor normed_515_begin_0 = const()[name = string("normed_515_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_515_end_0 = const()[name = string("normed_515_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_515_end_mask_0 = const()[name = string("normed_515_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_515 = slice_by_index(begin = normed_515_begin_0, end = normed_515_end_0, end_mask = normed_515_end_mask_0, x = normed_513_cast_fp16)[name = string("normed_515")]; - tensor var_14893_to_fp16 = const()[name = string("op_14893_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452400448)))]; - tensor k_43_cast_fp16 = mul(x = normed_515, y = var_14893_to_fp16)[name = string("k_43_cast_fp16")]; - tensor var_14895_cast_fp16 = mul(x = q_43_cast_fp16, y = cos_1_cast_fp16)[name = string("op_14895_cast_fp16")]; - tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_85_cast_fp16 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_43_cast_fp16)[name = string("x1_85_cast_fp16")]; - tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_85_cast_fp16 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_43_cast_fp16)[name = string("x2_85_cast_fp16")]; - fp16 const_812_promoted_to_fp16 = const()[name = string("const_812_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14916_cast_fp16 = mul(x = x2_85_cast_fp16, y = const_812_promoted_to_fp16)[name = string("op_14916_cast_fp16")]; - int32 var_14918 = const()[name = string("op_14918"), val = int32(-1)]; - bool var_14919_interleave_0 = const()[name = string("op_14919_interleave_0"), val = bool(false)]; - tensor var_14919_cast_fp16 = concat(axis = var_14918, interleave = var_14919_interleave_0, values = (var_14916_cast_fp16, x1_85_cast_fp16))[name = string("op_14919_cast_fp16")]; - tensor var_14920_cast_fp16 = mul(x = var_14919_cast_fp16, y = sin_1_cast_fp16)[name = string("op_14920_cast_fp16")]; - tensor query_states_85_cast_fp16 = add(x = var_14895_cast_fp16, y = var_14920_cast_fp16)[name = string("query_states_85_cast_fp16")]; - tensor var_14923_cast_fp16 = mul(x = k_43_cast_fp16, y = cos_1_cast_fp16)[name = string("op_14923_cast_fp16")]; - tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_87_cast_fp16 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_43_cast_fp16)[name = string("x1_87_cast_fp16")]; - tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_87_cast_fp16 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_43_cast_fp16)[name = string("x2_87_cast_fp16")]; - fp16 const_815_promoted_to_fp16 = const()[name = string("const_815_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14944_cast_fp16 = mul(x = x2_87_cast_fp16, y = const_815_promoted_to_fp16)[name = string("op_14944_cast_fp16")]; - int32 var_14946 = const()[name = string("op_14946"), val = int32(-1)]; - bool var_14947_interleave_0 = const()[name = string("op_14947_interleave_0"), val = bool(false)]; - tensor var_14947_cast_fp16 = concat(axis = var_14946, interleave = var_14947_interleave_0, values = (var_14944_cast_fp16, x1_87_cast_fp16))[name = string("op_14947_cast_fp16")]; - tensor var_14948_cast_fp16 = mul(x = var_14947_cast_fp16, y = sin_1_cast_fp16)[name = string("op_14948_cast_fp16")]; - tensor key_states_85_cast_fp16 = add(x = var_14923_cast_fp16, y = var_14948_cast_fp16)[name = string("key_states_85_cast_fp16")]; - tensor key_slice_37_begin_0 = const()[name = string("key_slice_37_begin_0"), val = tensor([18, 0, 0, 0])]; - tensor key_slice_37_end_0 = const()[name = string("key_slice_37_end_0"), val = tensor([19, 1, 512, 256])]; - tensor key_slice_37_end_mask_0 = const()[name = string("key_slice_37_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_37_cast_fp16 = slice_by_index(begin = key_slice_37_begin_0, end = key_slice_37_end_0, end_mask = key_slice_37_end_mask_0, x = coreml_update_state_93)[name = string("key_slice_37_cast_fp16")]; - tensor key_tail_37_begin_0 = const()[name = string("key_tail_37_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_37_end_0 = const()[name = string("key_tail_37_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_37_cast_fp16 = slice_by_index(begin = key_tail_37_begin_0, end = key_tail_37_end_0, x = key_slice_37_cast_fp16)[name = string("key_tail_37_cast_fp16")]; - int32 var_14961 = const()[name = string("op_14961"), val = int32(2)]; - bool shifted_key_37_interleave_0 = const()[name = string("shifted_key_37_interleave_0"), val = bool(false)]; - tensor shifted_key_37_cast_fp16 = concat(axis = var_14961, interleave = shifted_key_37_interleave_0, values = (key_tail_37_cast_fp16, key_states_85_cast_fp16))[name = string("shifted_key_37_cast_fp16")]; - tensor concat_96 = const()[name = string("concat_96"), val = tensor([18, 0, 0, 0])]; - tensor concat_97 = const()[name = string("concat_97"), val = tensor([19, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16 = slice_update(begin = concat_96, begin_mask = model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0, end = concat_97, end_mask = model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_37_stride_0, update = shifted_key_37_cast_fp16, x = coreml_update_state_93)[name = string("model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_94_write_state")]; - tensor coreml_update_state_94 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_94")]; - tensor value_slice_37_begin_0 = const()[name = string("value_slice_37_begin_0"), val = tensor([40, 0, 0, 0])]; - tensor value_slice_37_end_0 = const()[name = string("value_slice_37_end_0"), val = tensor([41, 1, 512, 256])]; - tensor value_slice_37_end_mask_0 = const()[name = string("value_slice_37_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_37_cast_fp16 = slice_by_index(begin = value_slice_37_begin_0, end = value_slice_37_end_0, end_mask = value_slice_37_end_mask_0, x = coreml_update_state_94)[name = string("value_slice_37_cast_fp16")]; - tensor value_tail_37_begin_0 = const()[name = string("value_tail_37_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_37_end_0 = const()[name = string("value_tail_37_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_37_cast_fp16 = slice_by_index(begin = value_tail_37_begin_0, end = value_tail_37_end_0, x = value_slice_37_cast_fp16)[name = string("value_tail_37_cast_fp16")]; - int32 var_14995 = const()[name = string("op_14995"), val = int32(2)]; - bool shifted_value_37_interleave_0 = const()[name = string("shifted_value_37_interleave_0"), val = bool(false)]; - tensor shifted_value_37_cast_fp16 = concat(axis = var_14995, interleave = shifted_value_37_interleave_0, values = (value_tail_37_cast_fp16, var_14835))[name = string("shifted_value_37_cast_fp16")]; - tensor concat_98 = const()[name = string("concat_98"), val = tensor([40, 0, 0, 0])]; - tensor concat_99 = const()[name = string("concat_99"), val = tensor([41, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16 = slice_update(begin = concat_98, begin_mask = model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0, end = concat_99, end_mask = model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_38_stride_0, update = shifted_value_37_cast_fp16, x = coreml_update_state_94)[name = string("model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_95_write_state")]; - tensor coreml_update_state_95 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_95")]; - tensor var_15023_begin_0 = const()[name = string("op_15023_begin_0"), val = tensor([18, 0, 0, 0])]; - tensor var_15023_end_0 = const()[name = string("op_15023_end_0"), val = tensor([19, 1, 512, 256])]; - tensor var_15023_end_mask_0 = const()[name = string("op_15023_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_15023_cast_fp16 = slice_by_index(begin = var_15023_begin_0, end = var_15023_end_0, end_mask = var_15023_end_mask_0, x = coreml_update_state_95)[name = string("op_15023_cast_fp16")]; - tensor var_15030_begin_0 = const()[name = string("op_15030_begin_0"), val = tensor([40, 0, 0, 0])]; - tensor var_15030_end_0 = const()[name = string("op_15030_end_0"), val = tensor([41, 1, 512, 256])]; - tensor var_15030_end_mask_0 = const()[name = string("op_15030_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_15030_cast_fp16 = slice_by_index(begin = var_15030_begin_0, end = var_15030_end_0, end_mask = var_15030_end_mask_0, x = coreml_update_state_95)[name = string("op_15030_cast_fp16")]; - tensor var_15067 = const()[name = string("op_15067"), val = tensor([1, 4, 1, 1])]; - tensor x_341_cast_fp16 = tile(reps = var_15067, x = var_15023_cast_fp16)[name = string("x_341_cast_fp16")]; - tensor var_15087 = const()[name = string("op_15087"), val = tensor([1, 4, 1, 1])]; - tensor x_347_cast_fp16 = tile(reps = var_15087, x = var_15030_cast_fp16)[name = string("x_347_cast_fp16")]; - bool var_15114_transpose_x_1 = const()[name = string("op_15114_transpose_x_1"), val = bool(false)]; - bool var_15114_transpose_y_1 = const()[name = string("op_15114_transpose_y_1"), val = bool(true)]; - tensor var_15114 = matmul(transpose_x = var_15114_transpose_x_1, transpose_y = var_15114_transpose_y_1, x = query_states_85_cast_fp16, y = x_341_cast_fp16)[name = string("op_15114")]; - fp16 var_15115_to_fp16 = const()[name = string("op_15115_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_127_cast_fp16 = mul(x = var_15114, y = var_15115_to_fp16)[name = string("attn_weights_127_cast_fp16")]; - tensor attn_weights_129_cast_fp16 = add(x = attn_weights_127_cast_fp16, y = var_2059)[name = string("attn_weights_129_cast_fp16")]; - int32 var_15150 = const()[name = string("op_15150"), val = int32(-1)]; - tensor attn_weights_131_cast_fp16 = softmax(axis = var_15150, x = attn_weights_129_cast_fp16)[name = string("attn_weights_131_cast_fp16")]; - bool attn_output_211_transpose_x_0 = const()[name = string("attn_output_211_transpose_x_0"), val = bool(false)]; - bool attn_output_211_transpose_y_0 = const()[name = string("attn_output_211_transpose_y_0"), val = bool(false)]; - tensor attn_output_211_cast_fp16 = matmul(transpose_x = attn_output_211_transpose_x_0, transpose_y = attn_output_211_transpose_y_0, x = attn_weights_131_cast_fp16, y = x_347_cast_fp16)[name = string("attn_output_211_cast_fp16")]; - tensor var_15161_perm_0 = const()[name = string("op_15161_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_15165 = const()[name = string("op_15165"), val = tensor([1, 1, 1024])]; - tensor var_15161_cast_fp16 = transpose(perm = var_15161_perm_0, x = attn_output_211_cast_fp16)[name = string("transpose_28")]; - tensor attn_output_215_cast_fp16 = reshape(shape = var_15165, x = var_15161_cast_fp16)[name = string("attn_output_215_cast_fp16")]; - tensor var_15170 = const()[name = string("op_15170"), val = tensor([0, 2, 1])]; - string var_15186_pad_type_0 = const()[name = string("op_15186_pad_type_0"), val = string("valid")]; - int32 var_15186_groups_0 = const()[name = string("op_15186_groups_0"), val = int32(1)]; - tensor var_15186_strides_0 = const()[name = string("op_15186_strides_0"), val = tensor([1])]; - tensor var_15186_pad_0 = const()[name = string("op_15186_pad_0"), val = tensor([0, 0])]; - tensor var_15186_dilations_0 = const()[name = string("op_15186_dilations_0"), val = tensor([1])]; - tensor squeeze_21_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452401024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453285824))))[name = string("squeeze_21_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_15171_cast_fp16 = transpose(perm = var_15170, x = attn_output_215_cast_fp16)[name = string("transpose_27")]; - tensor var_15186_cast_fp16 = conv(dilations = var_15186_dilations_0, groups = var_15186_groups_0, pad = var_15186_pad_0, pad_type = var_15186_pad_type_0, strides = var_15186_strides_0, weight = squeeze_21_cast_fp16_to_fp32_to_fp16_palettized, x = var_15171_cast_fp16)[name = string("op_15186_cast_fp16")]; - tensor var_15190 = const()[name = string("op_15190"), val = tensor([0, 2, 1])]; - int32 var_15201 = const()[name = string("op_15201"), val = int32(-1)]; - fp16 const_824_promoted_to_fp16 = const()[name = string("const_824_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_345_cast_fp16 = transpose(perm = var_15190, x = var_15186_cast_fp16)[name = string("transpose_26")]; - tensor var_15203_cast_fp16 = mul(x = hidden_states_345_cast_fp16, y = const_824_promoted_to_fp16)[name = string("op_15203_cast_fp16")]; - bool input_431_interleave_0 = const()[name = string("input_431_interleave_0"), val = bool(false)]; - tensor input_431_cast_fp16 = concat(axis = var_15201, interleave = input_431_interleave_0, values = (hidden_states_345_cast_fp16, var_15203_cast_fp16))[name = string("input_431_cast_fp16")]; - tensor normed_517_axes_0 = const()[name = string("normed_517_axes_0"), val = tensor([-1])]; - fp16 var_15198_to_fp16 = const()[name = string("op_15198_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_517_cast_fp16 = layer_norm(axes = normed_517_axes_0, epsilon = var_15198_to_fp16, x = input_431_cast_fp16)[name = string("normed_517_cast_fp16")]; - tensor normed_519_begin_0 = const()[name = string("normed_519_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_519_end_0 = const()[name = string("normed_519_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_519_end_mask_0 = const()[name = string("normed_519_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_519_cast_fp16 = slice_by_index(begin = normed_519_begin_0, end = normed_519_end_0, end_mask = normed_519_end_mask_0, x = normed_517_cast_fp16)[name = string("normed_519_cast_fp16")]; - tensor var_15217_to_fp16 = const()[name = string("op_15217_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453304320)))]; - tensor attn_output_219_cast_fp16 = mul(x = normed_519_cast_fp16, y = var_15217_to_fp16)[name = string("attn_output_219_cast_fp16")]; - tensor hidden_states_347_cast_fp16 = add(x = hidden_states_337_cast_fp16, y = attn_output_219_cast_fp16)[name = string("hidden_states_347_cast_fp16")]; - int32 var_15230 = const()[name = string("op_15230"), val = int32(-1)]; - fp16 const_828_promoted_to_fp16 = const()[name = string("const_828_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15232_cast_fp16 = mul(x = hidden_states_347_cast_fp16, y = const_828_promoted_to_fp16)[name = string("op_15232_cast_fp16")]; - bool input_433_interleave_0 = const()[name = string("input_433_interleave_0"), val = bool(false)]; - tensor input_433_cast_fp16 = concat(axis = var_15230, interleave = input_433_interleave_0, values = (hidden_states_347_cast_fp16, var_15232_cast_fp16))[name = string("input_433_cast_fp16")]; - tensor normed_521_axes_0 = const()[name = string("normed_521_axes_0"), val = tensor([-1])]; - fp16 var_15227_to_fp16 = const()[name = string("op_15227_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_521_cast_fp16 = layer_norm(axes = normed_521_axes_0, epsilon = var_15227_to_fp16, x = input_433_cast_fp16)[name = string("normed_521_cast_fp16")]; - tensor normed_523_begin_0 = const()[name = string("normed_523_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_523_end_0 = const()[name = string("normed_523_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_523_end_mask_0 = const()[name = string("normed_523_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_523_cast_fp16 = slice_by_index(begin = normed_523_begin_0, end = normed_523_end_0, end_mask = normed_523_end_mask_0, x = normed_521_cast_fp16)[name = string("normed_523_cast_fp16")]; - tensor var_15246_to_fp16 = const()[name = string("op_15246_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453306688)))]; - tensor x_349_cast_fp16 = mul(x = normed_523_cast_fp16, y = var_15246_to_fp16)[name = string("x_349_cast_fp16")]; - tensor var_15258 = const()[name = string("op_15258"), val = tensor([0, 2, 1])]; - tensor input_435_axes_0 = const()[name = string("input_435_axes_0"), val = tensor([2])]; - tensor var_15259_cast_fp16 = transpose(perm = var_15258, x = x_349_cast_fp16)[name = string("transpose_25")]; - tensor input_435_cast_fp16 = expand_dims(axes = input_435_axes_0, x = var_15259_cast_fp16)[name = string("input_435_cast_fp16")]; - string x_351_pad_type_0 = const()[name = string("x_351_pad_type_0"), val = string("valid")]; - tensor x_351_strides_0 = const()[name = string("x_351_strides_0"), val = tensor([1, 1])]; - tensor x_351_pad_0 = const()[name = string("x_351_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_351_dilations_0 = const()[name = string("x_351_dilations_0"), val = tensor([1, 1])]; - int32 x_351_groups_0 = const()[name = string("x_351_groups_0"), val = int32(1)]; - tensor model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(810469184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816441216))))[name = string("model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_351_cast_fp16 = conv(dilations = x_351_dilations_0, groups = x_351_groups_0, pad = x_351_pad_0, pad_type = x_351_pad_type_0, strides = x_351_strides_0, weight = model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_435_cast_fp16)[name = string("x_351_cast_fp16")]; - string b_43_pad_type_0 = const()[name = string("b_43_pad_type_0"), val = string("valid")]; - tensor b_43_strides_0 = const()[name = string("b_43_strides_0"), val = tensor([1, 1])]; - tensor b_43_pad_0 = const()[name = string("b_43_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_43_dilations_0 = const()[name = string("b_43_dilations_0"), val = tensor([1, 1])]; - int32 b_43_groups_0 = const()[name = string("b_43_groups_0"), val = int32(1)]; - tensor model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816551872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(822523904))))[name = string("model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_43_cast_fp16 = conv(dilations = b_43_dilations_0, groups = b_43_groups_0, pad = b_43_pad_0, pad_type = b_43_pad_type_0, strides = b_43_strides_0, weight = model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_435_cast_fp16)[name = string("b_43_cast_fp16")]; - string var_15284_mode_0 = const()[name = string("op_15284_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_15284_cast_fp16 = gelu(mode = var_15284_mode_0, x = x_351_cast_fp16)[name = string("op_15284_cast_fp16")]; - tensor input_437_cast_fp16 = mul(x = var_15284_cast_fp16, y = b_43_cast_fp16)[name = string("input_437_cast_fp16")]; - string e_43_pad_type_0 = const()[name = string("e_43_pad_type_0"), val = string("valid")]; - tensor e_43_strides_0 = const()[name = string("e_43_strides_0"), val = tensor([1, 1])]; - tensor e_43_pad_0 = const()[name = string("e_43_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_43_dilations_0 = const()[name = string("e_43_dilations_0"), val = tensor([1, 1])]; - int32 e_43_groups_0 = const()[name = string("e_43_groups_0"), val = int32(1)]; - tensor model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465474432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471446464))))[name = string("model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_43_cast_fp16 = conv(dilations = e_43_dilations_0, groups = e_43_groups_0, pad = e_43_pad_0, pad_type = e_43_pad_type_0, strides = e_43_strides_0, weight = model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_437_cast_fp16)[name = string("e_43_cast_fp16")]; - tensor var_15292_axes_0 = const()[name = string("op_15292_axes_0"), val = tensor([2])]; - tensor var_15292_cast_fp16 = squeeze(axes = var_15292_axes_0, x = e_43_cast_fp16)[name = string("op_15292_cast_fp16")]; - tensor var_15293 = const()[name = string("op_15293"), val = tensor([0, 2, 1])]; - int32 var_15304 = const()[name = string("op_15304"), val = int32(-1)]; - fp16 const_832_promoted_to_fp16 = const()[name = string("const_832_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_349_cast_fp16 = transpose(perm = var_15293, x = var_15292_cast_fp16)[name = string("transpose_24")]; - tensor var_15306_cast_fp16 = mul(x = hidden_states_349_cast_fp16, y = const_832_promoted_to_fp16)[name = string("op_15306_cast_fp16")]; - bool input_439_interleave_0 = const()[name = string("input_439_interleave_0"), val = bool(false)]; - tensor input_439_cast_fp16 = concat(axis = var_15304, interleave = input_439_interleave_0, values = (hidden_states_349_cast_fp16, var_15306_cast_fp16))[name = string("input_439_cast_fp16")]; - tensor normed_525_axes_0 = const()[name = string("normed_525_axes_0"), val = tensor([-1])]; - fp16 var_15301_to_fp16 = const()[name = string("op_15301_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_525_cast_fp16 = layer_norm(axes = normed_525_axes_0, epsilon = var_15301_to_fp16, x = input_439_cast_fp16)[name = string("normed_525_cast_fp16")]; - tensor normed_527_begin_0 = const()[name = string("normed_527_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_527_end_0 = const()[name = string("normed_527_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_527_end_mask_0 = const()[name = string("normed_527_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_527_cast_fp16 = slice_by_index(begin = normed_527_begin_0, end = normed_527_end_0, end_mask = normed_527_end_mask_0, x = normed_525_cast_fp16)[name = string("normed_527_cast_fp16")]; - tensor var_15320_to_fp16 = const()[name = string("op_15320_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471464960)))]; - tensor hidden_states_351_cast_fp16 = mul(x = normed_527_cast_fp16, y = var_15320_to_fp16)[name = string("hidden_states_351_cast_fp16")]; - tensor hidden_states_353_cast_fp16 = add(x = hidden_states_347_cast_fp16, y = hidden_states_351_cast_fp16)[name = string("hidden_states_353_cast_fp16")]; - int32 var_15371 = const()[name = string("op_15371"), val = int32(-1)]; - fp16 const_836_promoted_to_fp16 = const()[name = string("const_836_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15373_cast_fp16 = mul(x = hidden_states_353_cast_fp16, y = const_836_promoted_to_fp16)[name = string("op_15373_cast_fp16")]; - bool input_441_interleave_0 = const()[name = string("input_441_interleave_0"), val = bool(false)]; - tensor input_441_cast_fp16 = concat(axis = var_15371, interleave = input_441_interleave_0, values = (hidden_states_353_cast_fp16, var_15373_cast_fp16))[name = string("input_441_cast_fp16")]; - tensor normed_529_axes_0 = const()[name = string("normed_529_axes_0"), val = tensor([-1])]; - fp16 var_15368_to_fp16 = const()[name = string("op_15368_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_529_cast_fp16 = layer_norm(axes = normed_529_axes_0, epsilon = var_15368_to_fp16, x = input_441_cast_fp16)[name = string("normed_529_cast_fp16")]; - tensor normed_531_begin_0 = const()[name = string("normed_531_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_531_end_0 = const()[name = string("normed_531_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_531_end_mask_0 = const()[name = string("normed_531_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_531_cast_fp16 = slice_by_index(begin = normed_531_begin_0, end = normed_531_end_0, end_mask = normed_531_end_mask_0, x = normed_529_cast_fp16)[name = string("normed_531_cast_fp16")]; - tensor var_15387_to_fp16 = const()[name = string("op_15387_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471467328)))]; - tensor hidden_states_355_cast_fp16 = mul(x = normed_531_cast_fp16, y = var_15387_to_fp16)[name = string("hidden_states_355_cast_fp16")]; - tensor var_15392 = const()[name = string("op_15392"), val = tensor([0, 2, 1])]; - tensor var_15395_axes_0 = const()[name = string("op_15395_axes_0"), val = tensor([2])]; - tensor var_15393_cast_fp16 = transpose(perm = var_15392, x = hidden_states_355_cast_fp16)[name = string("transpose_23")]; - tensor var_15395_cast_fp16 = expand_dims(axes = var_15395_axes_0, x = var_15393_cast_fp16)[name = string("op_15395_cast_fp16")]; - string var_15411_pad_type_0 = const()[name = string("op_15411_pad_type_0"), val = string("valid")]; - tensor var_15411_strides_0 = const()[name = string("op_15411_strides_0"), val = tensor([1, 1])]; - tensor var_15411_pad_0 = const()[name = string("op_15411_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_15411_dilations_0 = const()[name = string("op_15411_dilations_0"), val = tensor([1, 1])]; - int32 var_15411_groups_0 = const()[name = string("op_15411_groups_0"), val = int32(1)]; - tensor var_15411 = conv(dilations = var_15411_dilations_0, groups = var_15411_groups_0, pad = var_15411_pad_0, pad_type = var_15411_pad_type_0, strides = var_15411_strides_0, weight = model_model_layers_22_self_attn_q_proj_weight_palettized, x = var_15395_cast_fp16)[name = string("op_15411")]; - tensor var_15416 = const()[name = string("op_15416"), val = tensor([1, 4, 1, 256])]; - tensor var_15417 = reshape(shape = var_15416, x = var_15411)[name = string("op_15417")]; - string var_15433_pad_type_0 = const()[name = string("op_15433_pad_type_0"), val = string("valid")]; - tensor var_15433_strides_0 = const()[name = string("op_15433_strides_0"), val = tensor([1, 1])]; - tensor var_15433_pad_0 = const()[name = string("op_15433_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_15433_dilations_0 = const()[name = string("op_15433_dilations_0"), val = tensor([1, 1])]; - int32 var_15433_groups_0 = const()[name = string("op_15433_groups_0"), val = int32(1)]; - tensor var_15433 = conv(dilations = var_15433_dilations_0, groups = var_15433_groups_0, pad = var_15433_pad_0, pad_type = var_15433_pad_type_0, strides = var_15433_strides_0, weight = model_model_layers_22_self_attn_k_proj_weight_palettized, x = var_15395_cast_fp16)[name = string("op_15433")]; - tensor var_15438 = const()[name = string("op_15438"), val = tensor([1, 1, 1, 256])]; - tensor var_15439 = reshape(shape = var_15438, x = var_15433)[name = string("op_15439")]; - string var_15455_pad_type_0 = const()[name = string("op_15455_pad_type_0"), val = string("valid")]; - tensor var_15455_strides_0 = const()[name = string("op_15455_strides_0"), val = tensor([1, 1])]; - tensor var_15455_pad_0 = const()[name = string("op_15455_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_15455_dilations_0 = const()[name = string("op_15455_dilations_0"), val = tensor([1, 1])]; - int32 var_15455_groups_0 = const()[name = string("op_15455_groups_0"), val = int32(1)]; - tensor var_15455 = conv(dilations = var_15455_dilations_0, groups = var_15455_groups_0, pad = var_15455_pad_0, pad_type = var_15455_pad_type_0, strides = var_15455_strides_0, weight = model_model_layers_22_self_attn_v_proj_weight_palettized, x = var_15395_cast_fp16)[name = string("op_15455")]; - tensor var_15460 = const()[name = string("op_15460"), val = tensor([1, 1, 1, 256])]; - tensor var_15461 = reshape(shape = var_15460, x = var_15455)[name = string("op_15461")]; - int32 var_15476 = const()[name = string("op_15476"), val = int32(-1)]; - fp16 const_840_promoted = const()[name = string("const_840_promoted"), val = fp16(-0x1p+0)]; - tensor var_15478 = mul(x = var_15417, y = const_840_promoted)[name = string("op_15478")]; - bool input_445_interleave_0 = const()[name = string("input_445_interleave_0"), val = bool(false)]; - tensor input_445 = concat(axis = var_15476, interleave = input_445_interleave_0, values = (var_15417, var_15478))[name = string("input_445")]; - tensor normed_533_axes_0 = const()[name = string("normed_533_axes_0"), val = tensor([-1])]; - fp16 var_15473_to_fp16 = const()[name = string("op_15473_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_533_cast_fp16 = layer_norm(axes = normed_533_axes_0, epsilon = var_15473_to_fp16, x = input_445)[name = string("normed_533_cast_fp16")]; - tensor normed_535_begin_0 = const()[name = string("normed_535_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_535_end_0 = const()[name = string("normed_535_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_535_end_mask_0 = const()[name = string("normed_535_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_535 = slice_by_index(begin = normed_535_begin_0, end = normed_535_end_0, end_mask = normed_535_end_mask_0, x = normed_533_cast_fp16)[name = string("normed_535")]; - tensor var_15492_to_fp16 = const()[name = string("op_15492_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471469696)))]; - tensor q_45_cast_fp16 = mul(x = normed_535, y = var_15492_to_fp16)[name = string("q_45_cast_fp16")]; - int32 var_15503 = const()[name = string("op_15503"), val = int32(-1)]; - fp16 const_844_promoted = const()[name = string("const_844_promoted"), val = fp16(-0x1p+0)]; - tensor var_15505 = mul(x = var_15439, y = const_844_promoted)[name = string("op_15505")]; - bool input_447_interleave_0 = const()[name = string("input_447_interleave_0"), val = bool(false)]; - tensor input_447 = concat(axis = var_15503, interleave = input_447_interleave_0, values = (var_15439, var_15505))[name = string("input_447")]; - tensor normed_537_axes_0 = const()[name = string("normed_537_axes_0"), val = tensor([-1])]; - fp16 var_15500_to_fp16 = const()[name = string("op_15500_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_537_cast_fp16 = layer_norm(axes = normed_537_axes_0, epsilon = var_15500_to_fp16, x = input_447)[name = string("normed_537_cast_fp16")]; - tensor normed_539_begin_0 = const()[name = string("normed_539_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_539_end_0 = const()[name = string("normed_539_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_539_end_mask_0 = const()[name = string("normed_539_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_539 = slice_by_index(begin = normed_539_begin_0, end = normed_539_end_0, end_mask = normed_539_end_mask_0, x = normed_537_cast_fp16)[name = string("normed_539")]; - tensor var_15519_to_fp16 = const()[name = string("op_15519_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471470272)))]; - tensor k_45_cast_fp16 = mul(x = normed_539, y = var_15519_to_fp16)[name = string("k_45_cast_fp16")]; - tensor var_15521_cast_fp16 = mul(x = q_45_cast_fp16, y = cos_1_cast_fp16)[name = string("op_15521_cast_fp16")]; - tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_89_cast_fp16 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_45_cast_fp16)[name = string("x1_89_cast_fp16")]; - tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_89_cast_fp16 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_45_cast_fp16)[name = string("x2_89_cast_fp16")]; - fp16 const_850_promoted_to_fp16 = const()[name = string("const_850_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15542_cast_fp16 = mul(x = x2_89_cast_fp16, y = const_850_promoted_to_fp16)[name = string("op_15542_cast_fp16")]; - int32 var_15544 = const()[name = string("op_15544"), val = int32(-1)]; - bool var_15545_interleave_0 = const()[name = string("op_15545_interleave_0"), val = bool(false)]; - tensor var_15545_cast_fp16 = concat(axis = var_15544, interleave = var_15545_interleave_0, values = (var_15542_cast_fp16, x1_89_cast_fp16))[name = string("op_15545_cast_fp16")]; - tensor var_15546_cast_fp16 = mul(x = var_15545_cast_fp16, y = sin_1_cast_fp16)[name = string("op_15546_cast_fp16")]; - tensor query_states_89_cast_fp16 = add(x = var_15521_cast_fp16, y = var_15546_cast_fp16)[name = string("query_states_89_cast_fp16")]; - tensor var_15549_cast_fp16 = mul(x = k_45_cast_fp16, y = cos_1_cast_fp16)[name = string("op_15549_cast_fp16")]; - tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_91_cast_fp16 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_45_cast_fp16)[name = string("x1_91_cast_fp16")]; - tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_91_cast_fp16 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_45_cast_fp16)[name = string("x2_91_cast_fp16")]; - fp16 const_853_promoted_to_fp16 = const()[name = string("const_853_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15570_cast_fp16 = mul(x = x2_91_cast_fp16, y = const_853_promoted_to_fp16)[name = string("op_15570_cast_fp16")]; - int32 var_15572 = const()[name = string("op_15572"), val = int32(-1)]; - bool var_15573_interleave_0 = const()[name = string("op_15573_interleave_0"), val = bool(false)]; - tensor var_15573_cast_fp16 = concat(axis = var_15572, interleave = var_15573_interleave_0, values = (var_15570_cast_fp16, x1_91_cast_fp16))[name = string("op_15573_cast_fp16")]; - tensor var_15574_cast_fp16 = mul(x = var_15573_cast_fp16, y = sin_1_cast_fp16)[name = string("op_15574_cast_fp16")]; - tensor key_states_89_cast_fp16 = add(x = var_15549_cast_fp16, y = var_15574_cast_fp16)[name = string("key_states_89_cast_fp16")]; - tensor key_slice_39_begin_0 = const()[name = string("key_slice_39_begin_0"), val = tensor([19, 0, 0, 0])]; - tensor key_slice_39_end_0 = const()[name = string("key_slice_39_end_0"), val = tensor([20, 1, 512, 256])]; - tensor key_slice_39_end_mask_0 = const()[name = string("key_slice_39_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_39_cast_fp16 = slice_by_index(begin = key_slice_39_begin_0, end = key_slice_39_end_0, end_mask = key_slice_39_end_mask_0, x = coreml_update_state_95)[name = string("key_slice_39_cast_fp16")]; - tensor key_tail_39_begin_0 = const()[name = string("key_tail_39_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_39_end_0 = const()[name = string("key_tail_39_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_39_cast_fp16 = slice_by_index(begin = key_tail_39_begin_0, end = key_tail_39_end_0, x = key_slice_39_cast_fp16)[name = string("key_tail_39_cast_fp16")]; - int32 var_15587 = const()[name = string("op_15587"), val = int32(2)]; - bool shifted_key_39_interleave_0 = const()[name = string("shifted_key_39_interleave_0"), val = bool(false)]; - tensor shifted_key_39_cast_fp16 = concat(axis = var_15587, interleave = shifted_key_39_interleave_0, values = (key_tail_39_cast_fp16, key_states_89_cast_fp16))[name = string("shifted_key_39_cast_fp16")]; - tensor concat_100 = const()[name = string("concat_100"), val = tensor([19, 0, 0, 0])]; - tensor concat_101 = const()[name = string("concat_101"), val = tensor([20, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16 = slice_update(begin = concat_100, begin_mask = model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0, end = concat_101, end_mask = model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_39_stride_0, update = shifted_key_39_cast_fp16, x = coreml_update_state_95)[name = string("model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_96_write_state")]; - tensor coreml_update_state_96 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_96")]; - tensor value_slice_39_begin_0 = const()[name = string("value_slice_39_begin_0"), val = tensor([41, 0, 0, 0])]; - tensor value_slice_39_end_0 = const()[name = string("value_slice_39_end_0"), val = tensor([42, 1, 512, 256])]; - tensor value_slice_39_end_mask_0 = const()[name = string("value_slice_39_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_39_cast_fp16 = slice_by_index(begin = value_slice_39_begin_0, end = value_slice_39_end_0, end_mask = value_slice_39_end_mask_0, x = coreml_update_state_96)[name = string("value_slice_39_cast_fp16")]; - tensor value_tail_39_begin_0 = const()[name = string("value_tail_39_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_39_end_0 = const()[name = string("value_tail_39_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_39_cast_fp16 = slice_by_index(begin = value_tail_39_begin_0, end = value_tail_39_end_0, x = value_slice_39_cast_fp16)[name = string("value_tail_39_cast_fp16")]; - int32 var_15621 = const()[name = string("op_15621"), val = int32(2)]; - bool shifted_value_39_interleave_0 = const()[name = string("shifted_value_39_interleave_0"), val = bool(false)]; - tensor shifted_value_39_cast_fp16 = concat(axis = var_15621, interleave = shifted_value_39_interleave_0, values = (value_tail_39_cast_fp16, var_15461))[name = string("shifted_value_39_cast_fp16")]; - tensor concat_102 = const()[name = string("concat_102"), val = tensor([41, 0, 0, 0])]; - tensor concat_103 = const()[name = string("concat_103"), val = tensor([42, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16 = slice_update(begin = concat_102, begin_mask = model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0, end = concat_103, end_mask = model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_40_stride_0, update = shifted_value_39_cast_fp16, x = coreml_update_state_96)[name = string("model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_97_write_state")]; - tensor coreml_update_state_97 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_97")]; - tensor var_15649_begin_0 = const()[name = string("op_15649_begin_0"), val = tensor([19, 0, 0, 0])]; - tensor var_15649_end_0 = const()[name = string("op_15649_end_0"), val = tensor([20, 1, 512, 256])]; - tensor var_15649_end_mask_0 = const()[name = string("op_15649_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_15649_cast_fp16 = slice_by_index(begin = var_15649_begin_0, end = var_15649_end_0, end_mask = var_15649_end_mask_0, x = coreml_update_state_97)[name = string("op_15649_cast_fp16")]; - tensor var_15656_begin_0 = const()[name = string("op_15656_begin_0"), val = tensor([41, 0, 0, 0])]; - tensor var_15656_end_0 = const()[name = string("op_15656_end_0"), val = tensor([42, 1, 512, 256])]; - tensor var_15656_end_mask_0 = const()[name = string("op_15656_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_15656_cast_fp16 = slice_by_index(begin = var_15656_begin_0, end = var_15656_end_0, end_mask = var_15656_end_mask_0, x = coreml_update_state_97)[name = string("op_15656_cast_fp16")]; - tensor var_15693 = const()[name = string("op_15693"), val = tensor([1, 4, 1, 1])]; - tensor x_357_cast_fp16 = tile(reps = var_15693, x = var_15649_cast_fp16)[name = string("x_357_cast_fp16")]; - tensor var_15713 = const()[name = string("op_15713"), val = tensor([1, 4, 1, 1])]; - tensor x_363_cast_fp16 = tile(reps = var_15713, x = var_15656_cast_fp16)[name = string("x_363_cast_fp16")]; - bool var_15740_transpose_x_1 = const()[name = string("op_15740_transpose_x_1"), val = bool(false)]; - bool var_15740_transpose_y_1 = const()[name = string("op_15740_transpose_y_1"), val = bool(true)]; - tensor var_15740 = matmul(transpose_x = var_15740_transpose_x_1, transpose_y = var_15740_transpose_y_1, x = query_states_89_cast_fp16, y = x_357_cast_fp16)[name = string("op_15740")]; - fp16 var_15741_to_fp16 = const()[name = string("op_15741_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_133_cast_fp16 = mul(x = var_15740, y = var_15741_to_fp16)[name = string("attn_weights_133_cast_fp16")]; - tensor attn_weights_135_cast_fp16 = add(x = attn_weights_133_cast_fp16, y = var_2059)[name = string("attn_weights_135_cast_fp16")]; - int32 var_15776 = const()[name = string("op_15776"), val = int32(-1)]; - tensor attn_weights_137_cast_fp16 = softmax(axis = var_15776, x = attn_weights_135_cast_fp16)[name = string("attn_weights_137_cast_fp16")]; - bool attn_output_221_transpose_x_0 = const()[name = string("attn_output_221_transpose_x_0"), val = bool(false)]; - bool attn_output_221_transpose_y_0 = const()[name = string("attn_output_221_transpose_y_0"), val = bool(false)]; - tensor attn_output_221_cast_fp16 = matmul(transpose_x = attn_output_221_transpose_x_0, transpose_y = attn_output_221_transpose_y_0, x = attn_weights_137_cast_fp16, y = x_363_cast_fp16)[name = string("attn_output_221_cast_fp16")]; - tensor var_15787_perm_0 = const()[name = string("op_15787_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_15791 = const()[name = string("op_15791"), val = tensor([1, 1, 1024])]; - tensor var_15787_cast_fp16 = transpose(perm = var_15787_perm_0, x = attn_output_221_cast_fp16)[name = string("transpose_22")]; - tensor attn_output_225_cast_fp16 = reshape(shape = var_15791, x = var_15787_cast_fp16)[name = string("attn_output_225_cast_fp16")]; - tensor var_15796 = const()[name = string("op_15796"), val = tensor([0, 2, 1])]; - string var_15812_pad_type_0 = const()[name = string("op_15812_pad_type_0"), val = string("valid")]; - int32 var_15812_groups_0 = const()[name = string("op_15812_groups_0"), val = int32(1)]; - tensor var_15812_strides_0 = const()[name = string("op_15812_strides_0"), val = tensor([1])]; - tensor var_15812_pad_0 = const()[name = string("op_15812_pad_0"), val = tensor([0, 0])]; - tensor var_15812_dilations_0 = const()[name = string("op_15812_dilations_0"), val = tensor([1])]; - tensor squeeze_22_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471470848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472355648))))[name = string("squeeze_22_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_15797_cast_fp16 = transpose(perm = var_15796, x = attn_output_225_cast_fp16)[name = string("transpose_21")]; - tensor var_15812_cast_fp16 = conv(dilations = var_15812_dilations_0, groups = var_15812_groups_0, pad = var_15812_pad_0, pad_type = var_15812_pad_type_0, strides = var_15812_strides_0, weight = squeeze_22_cast_fp16_to_fp32_to_fp16_palettized, x = var_15797_cast_fp16)[name = string("op_15812_cast_fp16")]; - tensor var_15816 = const()[name = string("op_15816"), val = tensor([0, 2, 1])]; - int32 var_15827 = const()[name = string("op_15827"), val = int32(-1)]; - fp16 const_862_promoted_to_fp16 = const()[name = string("const_862_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_361_cast_fp16 = transpose(perm = var_15816, x = var_15812_cast_fp16)[name = string("transpose_20")]; - tensor var_15829_cast_fp16 = mul(x = hidden_states_361_cast_fp16, y = const_862_promoted_to_fp16)[name = string("op_15829_cast_fp16")]; - bool input_451_interleave_0 = const()[name = string("input_451_interleave_0"), val = bool(false)]; - tensor input_451_cast_fp16 = concat(axis = var_15827, interleave = input_451_interleave_0, values = (hidden_states_361_cast_fp16, var_15829_cast_fp16))[name = string("input_451_cast_fp16")]; - tensor normed_541_axes_0 = const()[name = string("normed_541_axes_0"), val = tensor([-1])]; - fp16 var_15824_to_fp16 = const()[name = string("op_15824_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_541_cast_fp16 = layer_norm(axes = normed_541_axes_0, epsilon = var_15824_to_fp16, x = input_451_cast_fp16)[name = string("normed_541_cast_fp16")]; - tensor normed_543_begin_0 = const()[name = string("normed_543_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_543_end_0 = const()[name = string("normed_543_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_543_end_mask_0 = const()[name = string("normed_543_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_543_cast_fp16 = slice_by_index(begin = normed_543_begin_0, end = normed_543_end_0, end_mask = normed_543_end_mask_0, x = normed_541_cast_fp16)[name = string("normed_543_cast_fp16")]; - tensor var_15843_to_fp16 = const()[name = string("op_15843_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472374144)))]; - tensor attn_output_229_cast_fp16 = mul(x = normed_543_cast_fp16, y = var_15843_to_fp16)[name = string("attn_output_229_cast_fp16")]; - tensor hidden_states_363_cast_fp16 = add(x = hidden_states_353_cast_fp16, y = attn_output_229_cast_fp16)[name = string("hidden_states_363_cast_fp16")]; - int32 var_15856 = const()[name = string("op_15856"), val = int32(-1)]; - fp16 const_866_promoted_to_fp16 = const()[name = string("const_866_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15858_cast_fp16 = mul(x = hidden_states_363_cast_fp16, y = const_866_promoted_to_fp16)[name = string("op_15858_cast_fp16")]; - bool input_453_interleave_0 = const()[name = string("input_453_interleave_0"), val = bool(false)]; - tensor input_453_cast_fp16 = concat(axis = var_15856, interleave = input_453_interleave_0, values = (hidden_states_363_cast_fp16, var_15858_cast_fp16))[name = string("input_453_cast_fp16")]; - tensor normed_545_axes_0 = const()[name = string("normed_545_axes_0"), val = tensor([-1])]; - fp16 var_15853_to_fp16 = const()[name = string("op_15853_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_545_cast_fp16 = layer_norm(axes = normed_545_axes_0, epsilon = var_15853_to_fp16, x = input_453_cast_fp16)[name = string("normed_545_cast_fp16")]; - tensor normed_547_begin_0 = const()[name = string("normed_547_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_547_end_0 = const()[name = string("normed_547_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_547_end_mask_0 = const()[name = string("normed_547_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_547_cast_fp16 = slice_by_index(begin = normed_547_begin_0, end = normed_547_end_0, end_mask = normed_547_end_mask_0, x = normed_545_cast_fp16)[name = string("normed_547_cast_fp16")]; - tensor var_15872_to_fp16 = const()[name = string("op_15872_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472376512)))]; - tensor x_365_cast_fp16 = mul(x = normed_547_cast_fp16, y = var_15872_to_fp16)[name = string("x_365_cast_fp16")]; - tensor var_15884 = const()[name = string("op_15884"), val = tensor([0, 2, 1])]; - tensor input_455_axes_0 = const()[name = string("input_455_axes_0"), val = tensor([2])]; - tensor var_15885_cast_fp16 = transpose(perm = var_15884, x = x_365_cast_fp16)[name = string("transpose_19")]; - tensor input_455_cast_fp16 = expand_dims(axes = input_455_axes_0, x = var_15885_cast_fp16)[name = string("input_455_cast_fp16")]; - string x_367_pad_type_0 = const()[name = string("x_367_pad_type_0"), val = string("valid")]; - tensor x_367_strides_0 = const()[name = string("x_367_strides_0"), val = tensor([1, 1])]; - tensor x_367_pad_0 = const()[name = string("x_367_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_367_dilations_0 = const()[name = string("x_367_dilations_0"), val = tensor([1, 1])]; - int32 x_367_groups_0 = const()[name = string("x_367_groups_0"), val = int32(1)]; - tensor model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(822634560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(828606592))))[name = string("model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_367_cast_fp16 = conv(dilations = x_367_dilations_0, groups = x_367_groups_0, pad = x_367_pad_0, pad_type = x_367_pad_type_0, strides = x_367_strides_0, weight = model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_455_cast_fp16)[name = string("x_367_cast_fp16")]; - string b_45_pad_type_0 = const()[name = string("b_45_pad_type_0"), val = string("valid")]; - tensor b_45_strides_0 = const()[name = string("b_45_strides_0"), val = tensor([1, 1])]; - tensor b_45_pad_0 = const()[name = string("b_45_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_45_dilations_0 = const()[name = string("b_45_dilations_0"), val = tensor([1, 1])]; - int32 b_45_groups_0 = const()[name = string("b_45_groups_0"), val = int32(1)]; - tensor model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(828717248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(834689280))))[name = string("model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_45_cast_fp16 = conv(dilations = b_45_dilations_0, groups = b_45_groups_0, pad = b_45_pad_0, pad_type = b_45_pad_type_0, strides = b_45_strides_0, weight = model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_455_cast_fp16)[name = string("b_45_cast_fp16")]; - string var_15910_mode_0 = const()[name = string("op_15910_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_15910_cast_fp16 = gelu(mode = var_15910_mode_0, x = x_367_cast_fp16)[name = string("op_15910_cast_fp16")]; - tensor input_457_cast_fp16 = mul(x = var_15910_cast_fp16, y = b_45_cast_fp16)[name = string("input_457_cast_fp16")]; - string e_45_pad_type_0 = const()[name = string("e_45_pad_type_0"), val = string("valid")]; - tensor e_45_strides_0 = const()[name = string("e_45_strides_0"), val = tensor([1, 1])]; - tensor e_45_pad_0 = const()[name = string("e_45_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_45_dilations_0 = const()[name = string("e_45_dilations_0"), val = tensor([1, 1])]; - int32 e_45_groups_0 = const()[name = string("e_45_groups_0"), val = int32(1)]; - tensor model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484544256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490516288))))[name = string("model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_45_cast_fp16 = conv(dilations = e_45_dilations_0, groups = e_45_groups_0, pad = e_45_pad_0, pad_type = e_45_pad_type_0, strides = e_45_strides_0, weight = model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_457_cast_fp16)[name = string("e_45_cast_fp16")]; - tensor var_15918_axes_0 = const()[name = string("op_15918_axes_0"), val = tensor([2])]; - tensor var_15918_cast_fp16 = squeeze(axes = var_15918_axes_0, x = e_45_cast_fp16)[name = string("op_15918_cast_fp16")]; - tensor var_15919 = const()[name = string("op_15919"), val = tensor([0, 2, 1])]; - int32 var_15930 = const()[name = string("op_15930"), val = int32(-1)]; - fp16 const_870_promoted_to_fp16 = const()[name = string("const_870_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_365_cast_fp16 = transpose(perm = var_15919, x = var_15918_cast_fp16)[name = string("transpose_18")]; - tensor var_15932_cast_fp16 = mul(x = hidden_states_365_cast_fp16, y = const_870_promoted_to_fp16)[name = string("op_15932_cast_fp16")]; - bool input_459_interleave_0 = const()[name = string("input_459_interleave_0"), val = bool(false)]; - tensor input_459_cast_fp16 = concat(axis = var_15930, interleave = input_459_interleave_0, values = (hidden_states_365_cast_fp16, var_15932_cast_fp16))[name = string("input_459_cast_fp16")]; - tensor normed_549_axes_0 = const()[name = string("normed_549_axes_0"), val = tensor([-1])]; - fp16 var_15927_to_fp16 = const()[name = string("op_15927_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_549_cast_fp16 = layer_norm(axes = normed_549_axes_0, epsilon = var_15927_to_fp16, x = input_459_cast_fp16)[name = string("normed_549_cast_fp16")]; - tensor normed_551_begin_0 = const()[name = string("normed_551_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_551_end_0 = const()[name = string("normed_551_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_551_end_mask_0 = const()[name = string("normed_551_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_551_cast_fp16 = slice_by_index(begin = normed_551_begin_0, end = normed_551_end_0, end_mask = normed_551_end_mask_0, x = normed_549_cast_fp16)[name = string("normed_551_cast_fp16")]; - tensor var_15946_to_fp16 = const()[name = string("op_15946_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490534784)))]; - tensor hidden_states_367_cast_fp16 = mul(x = normed_551_cast_fp16, y = var_15946_to_fp16)[name = string("hidden_states_367_cast_fp16")]; - tensor hidden_states_369_cast_fp16 = add(x = hidden_states_363_cast_fp16, y = hidden_states_367_cast_fp16)[name = string("hidden_states_369_cast_fp16")]; - int32 var_15997 = const()[name = string("op_15997"), val = int32(-1)]; - fp16 const_874_promoted_to_fp16 = const()[name = string("const_874_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15999_cast_fp16 = mul(x = hidden_states_369_cast_fp16, y = const_874_promoted_to_fp16)[name = string("op_15999_cast_fp16")]; - bool input_461_interleave_0 = const()[name = string("input_461_interleave_0"), val = bool(false)]; - tensor input_461_cast_fp16 = concat(axis = var_15997, interleave = input_461_interleave_0, values = (hidden_states_369_cast_fp16, var_15999_cast_fp16))[name = string("input_461_cast_fp16")]; - tensor normed_553_axes_0 = const()[name = string("normed_553_axes_0"), val = tensor([-1])]; - fp16 var_15994_to_fp16 = const()[name = string("op_15994_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_553_cast_fp16 = layer_norm(axes = normed_553_axes_0, epsilon = var_15994_to_fp16, x = input_461_cast_fp16)[name = string("normed_553_cast_fp16")]; - tensor normed_555_begin_0 = const()[name = string("normed_555_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_555_end_0 = const()[name = string("normed_555_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_555_end_mask_0 = const()[name = string("normed_555_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_555_cast_fp16 = slice_by_index(begin = normed_555_begin_0, end = normed_555_end_0, end_mask = normed_555_end_mask_0, x = normed_553_cast_fp16)[name = string("normed_555_cast_fp16")]; - tensor var_16013_to_fp16 = const()[name = string("op_16013_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490537152)))]; - tensor hidden_states_371_cast_fp16 = mul(x = normed_555_cast_fp16, y = var_16013_to_fp16)[name = string("hidden_states_371_cast_fp16")]; - tensor var_16018 = const()[name = string("op_16018"), val = tensor([0, 2, 1])]; - tensor var_16021_axes_0 = const()[name = string("op_16021_axes_0"), val = tensor([2])]; - tensor var_16019_cast_fp16 = transpose(perm = var_16018, x = hidden_states_371_cast_fp16)[name = string("transpose_17")]; - tensor var_16021_cast_fp16 = expand_dims(axes = var_16021_axes_0, x = var_16019_cast_fp16)[name = string("op_16021_cast_fp16")]; - string var_16037_pad_type_0 = const()[name = string("op_16037_pad_type_0"), val = string("valid")]; - tensor var_16037_strides_0 = const()[name = string("op_16037_strides_0"), val = tensor([1, 1])]; - tensor var_16037_pad_0 = const()[name = string("op_16037_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_16037_dilations_0 = const()[name = string("op_16037_dilations_0"), val = tensor([1, 1])]; - int32 var_16037_groups_0 = const()[name = string("op_16037_groups_0"), val = int32(1)]; - tensor var_16037 = conv(dilations = var_16037_dilations_0, groups = var_16037_groups_0, pad = var_16037_pad_0, pad_type = var_16037_pad_type_0, strides = var_16037_strides_0, weight = model_model_layers_23_self_attn_q_proj_weight_palettized, x = var_16021_cast_fp16)[name = string("op_16037")]; - tensor var_16042 = const()[name = string("op_16042"), val = tensor([1, 4, 1, 256])]; - tensor var_16043 = reshape(shape = var_16042, x = var_16037)[name = string("op_16043")]; - string var_16059_pad_type_0 = const()[name = string("op_16059_pad_type_0"), val = string("valid")]; - tensor var_16059_strides_0 = const()[name = string("op_16059_strides_0"), val = tensor([1, 1])]; - tensor var_16059_pad_0 = const()[name = string("op_16059_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_16059_dilations_0 = const()[name = string("op_16059_dilations_0"), val = tensor([1, 1])]; - int32 var_16059_groups_0 = const()[name = string("op_16059_groups_0"), val = int32(1)]; - tensor var_16059 = conv(dilations = var_16059_dilations_0, groups = var_16059_groups_0, pad = var_16059_pad_0, pad_type = var_16059_pad_type_0, strides = var_16059_strides_0, weight = model_model_layers_23_self_attn_k_proj_weight_palettized, x = var_16021_cast_fp16)[name = string("op_16059")]; - tensor var_16064 = const()[name = string("op_16064"), val = tensor([1, 1, 1, 256])]; - tensor var_16065 = reshape(shape = var_16064, x = var_16059)[name = string("op_16065")]; - string var_16081_pad_type_0 = const()[name = string("op_16081_pad_type_0"), val = string("valid")]; - tensor var_16081_strides_0 = const()[name = string("op_16081_strides_0"), val = tensor([1, 1])]; - tensor var_16081_pad_0 = const()[name = string("op_16081_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_16081_dilations_0 = const()[name = string("op_16081_dilations_0"), val = tensor([1, 1])]; - int32 var_16081_groups_0 = const()[name = string("op_16081_groups_0"), val = int32(1)]; - tensor var_16081 = conv(dilations = var_16081_dilations_0, groups = var_16081_groups_0, pad = var_16081_pad_0, pad_type = var_16081_pad_type_0, strides = var_16081_strides_0, weight = model_model_layers_23_self_attn_v_proj_weight_palettized, x = var_16021_cast_fp16)[name = string("op_16081")]; - tensor var_16086 = const()[name = string("op_16086"), val = tensor([1, 1, 1, 256])]; - tensor var_16087 = reshape(shape = var_16086, x = var_16081)[name = string("op_16087")]; - int32 var_16102 = const()[name = string("op_16102"), val = int32(-1)]; - fp16 const_878_promoted = const()[name = string("const_878_promoted"), val = fp16(-0x1p+0)]; - tensor var_16104 = mul(x = var_16043, y = const_878_promoted)[name = string("op_16104")]; - bool input_465_interleave_0 = const()[name = string("input_465_interleave_0"), val = bool(false)]; - tensor input_465 = concat(axis = var_16102, interleave = input_465_interleave_0, values = (var_16043, var_16104))[name = string("input_465")]; - tensor normed_557_axes_0 = const()[name = string("normed_557_axes_0"), val = tensor([-1])]; - fp16 var_16099_to_fp16 = const()[name = string("op_16099_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_557_cast_fp16 = layer_norm(axes = normed_557_axes_0, epsilon = var_16099_to_fp16, x = input_465)[name = string("normed_557_cast_fp16")]; - tensor normed_559_begin_0 = const()[name = string("normed_559_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_559_end_0 = const()[name = string("normed_559_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_559_end_mask_0 = const()[name = string("normed_559_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_559 = slice_by_index(begin = normed_559_begin_0, end = normed_559_end_0, end_mask = normed_559_end_mask_0, x = normed_557_cast_fp16)[name = string("normed_559")]; - tensor var_16118_to_fp16 = const()[name = string("op_16118_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490539520)))]; - tensor q_47_cast_fp16 = mul(x = normed_559, y = var_16118_to_fp16)[name = string("q_47_cast_fp16")]; - int32 var_16129 = const()[name = string("op_16129"), val = int32(-1)]; - fp16 const_882_promoted = const()[name = string("const_882_promoted"), val = fp16(-0x1p+0)]; - tensor var_16131 = mul(x = var_16065, y = const_882_promoted)[name = string("op_16131")]; - bool input_467_interleave_0 = const()[name = string("input_467_interleave_0"), val = bool(false)]; - tensor input_467 = concat(axis = var_16129, interleave = input_467_interleave_0, values = (var_16065, var_16131))[name = string("input_467")]; - tensor normed_561_axes_0 = const()[name = string("normed_561_axes_0"), val = tensor([-1])]; - fp16 var_16126_to_fp16 = const()[name = string("op_16126_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_561_cast_fp16 = layer_norm(axes = normed_561_axes_0, epsilon = var_16126_to_fp16, x = input_467)[name = string("normed_561_cast_fp16")]; - tensor normed_563_begin_0 = const()[name = string("normed_563_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_563_end_0 = const()[name = string("normed_563_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_563_end_mask_0 = const()[name = string("normed_563_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_563 = slice_by_index(begin = normed_563_begin_0, end = normed_563_end_0, end_mask = normed_563_end_mask_0, x = normed_561_cast_fp16)[name = string("normed_563")]; - tensor var_16145_to_fp16 = const()[name = string("op_16145_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490540096)))]; - tensor k_47_cast_fp16 = mul(x = normed_563, y = var_16145_to_fp16)[name = string("k_47_cast_fp16")]; - tensor var_16147_cast_fp16 = mul(x = q_47_cast_fp16, y = cos_21_cast_fp16)[name = string("op_16147_cast_fp16")]; - tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_93_cast_fp16 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_47_cast_fp16)[name = string("x1_93_cast_fp16")]; - tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_93_cast_fp16 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_47_cast_fp16)[name = string("x2_93_cast_fp16")]; - fp16 const_888_promoted_to_fp16 = const()[name = string("const_888_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16168_cast_fp16 = mul(x = x2_93_cast_fp16, y = const_888_promoted_to_fp16)[name = string("op_16168_cast_fp16")]; - int32 var_16170 = const()[name = string("op_16170"), val = int32(-1)]; - bool var_16171_interleave_0 = const()[name = string("op_16171_interleave_0"), val = bool(false)]; - tensor var_16171_cast_fp16 = concat(axis = var_16170, interleave = var_16171_interleave_0, values = (var_16168_cast_fp16, x1_93_cast_fp16))[name = string("op_16171_cast_fp16")]; - tensor var_16172_cast_fp16 = mul(x = var_16171_cast_fp16, y = sin_21_cast_fp16)[name = string("op_16172_cast_fp16")]; - tensor query_states_93_cast_fp16 = add(x = var_16147_cast_fp16, y = var_16172_cast_fp16)[name = string("query_states_93_cast_fp16")]; - tensor var_16175_cast_fp16 = mul(x = k_47_cast_fp16, y = cos_21_cast_fp16)[name = string("op_16175_cast_fp16")]; - tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_95_cast_fp16 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = k_47_cast_fp16)[name = string("x1_95_cast_fp16")]; - tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_95_cast_fp16 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = k_47_cast_fp16)[name = string("x2_95_cast_fp16")]; - fp16 const_891_promoted_to_fp16 = const()[name = string("const_891_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16196_cast_fp16 = mul(x = x2_95_cast_fp16, y = const_891_promoted_to_fp16)[name = string("op_16196_cast_fp16")]; - int32 var_16198 = const()[name = string("op_16198"), val = int32(-1)]; - bool var_16199_interleave_0 = const()[name = string("op_16199_interleave_0"), val = bool(false)]; - tensor var_16199_cast_fp16 = concat(axis = var_16198, interleave = var_16199_interleave_0, values = (var_16196_cast_fp16, x1_95_cast_fp16))[name = string("op_16199_cast_fp16")]; - tensor var_16200_cast_fp16 = mul(x = var_16199_cast_fp16, y = sin_21_cast_fp16)[name = string("op_16200_cast_fp16")]; - tensor key_states_93_cast_fp16 = add(x = var_16175_cast_fp16, y = var_16200_cast_fp16)[name = string("key_states_93_cast_fp16")]; - tensor expand_dims_236 = const()[name = string("expand_dims_236"), val = tensor([3])]; - tensor expand_dims_237 = const()[name = string("expand_dims_237"), val = tensor([0])]; - tensor expand_dims_239 = const()[name = string("expand_dims_239"), val = tensor([0])]; - tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([4])]; - int32 concat_106_axis_0 = const()[name = string("concat_106_axis_0"), val = int32(0)]; - bool concat_106_interleave_0 = const()[name = string("concat_106_interleave_0"), val = bool(false)]; - tensor concat_106 = concat(axis = concat_106_axis_0, interleave = concat_106_interleave_0, values = (expand_dims_236, expand_dims_237, current_pos, expand_dims_239))[name = string("concat_106")]; - tensor concat_107_values1_0 = const()[name = string("concat_107_values1_0"), val = tensor([0])]; - tensor concat_107_values3_0 = const()[name = string("concat_107_values3_0"), val = tensor([0])]; - int32 concat_107_axis_0 = const()[name = string("concat_107_axis_0"), val = int32(0)]; - bool concat_107_interleave_0 = const()[name = string("concat_107_interleave_0"), val = bool(false)]; - tensor concat_107 = concat(axis = concat_107_axis_0, interleave = concat_107_interleave_0, values = (expand_dims_240, concat_107_values1_0, var_4997, concat_107_values3_0))[name = string("concat_107")]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_106, begin_mask = model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0, end = concat_107, end_mask = model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_7_stride_0, update = key_states_93_cast_fp16, x = coreml_update_state_87)[name = string("model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_98_write_state")]; - tensor coreml_update_state_98 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_98")]; - tensor expand_dims_242 = const()[name = string("expand_dims_242"), val = tensor([7])]; - tensor expand_dims_243 = const()[name = string("expand_dims_243"), val = tensor([0])]; - tensor expand_dims_245 = const()[name = string("expand_dims_245"), val = tensor([0])]; - tensor expand_dims_246 = const()[name = string("expand_dims_246"), val = tensor([8])]; - int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; - bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; - tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_242, expand_dims_243, current_pos, expand_dims_245))[name = string("concat_110")]; - tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; - tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; - int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; - bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; - tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_246, concat_111_values1_0, var_4997, concat_111_values3_0))[name = string("concat_111")]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_8_stride_0, update = var_16087, x = coreml_update_state_98)[name = string("model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_99_write_state")]; - tensor coreml_update_state_99 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_99")]; - tensor var_16255_begin_0 = const()[name = string("op_16255_begin_0"), val = tensor([3, 0, 0, 0])]; - tensor var_16255_end_0 = const()[name = string("op_16255_end_0"), val = tensor([4, 1, 4096, 256])]; - tensor var_16255_end_mask_0 = const()[name = string("op_16255_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_16255_cast_fp16 = slice_by_index(begin = var_16255_begin_0, end = var_16255_end_0, end_mask = var_16255_end_mask_0, x = coreml_update_state_99)[name = string("op_16255_cast_fp16")]; - tensor var_16262_begin_0 = const()[name = string("op_16262_begin_0"), val = tensor([7, 0, 0, 0])]; - tensor var_16262_end_0 = const()[name = string("op_16262_end_0"), val = tensor([1, 1, 4096, 256])]; - tensor var_16262_end_mask_0 = const()[name = string("op_16262_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_16262_cast_fp16 = slice_by_index(begin = var_16262_begin_0, end = var_16262_end_0, end_mask = var_16262_end_mask_0, x = coreml_update_state_99)[name = string("op_16262_cast_fp16")]; - tensor var_16299 = const()[name = string("op_16299"), val = tensor([1, 4, 1, 1])]; - tensor x_373_cast_fp16 = tile(reps = var_16299, x = var_16255_cast_fp16)[name = string("x_373_cast_fp16")]; - tensor var_16319 = const()[name = string("op_16319"), val = tensor([1, 4, 1, 1])]; - tensor x_379_cast_fp16 = tile(reps = var_16319, x = var_16262_cast_fp16)[name = string("x_379_cast_fp16")]; - bool var_16346_transpose_x_1 = const()[name = string("op_16346_transpose_x_1"), val = bool(false)]; - bool var_16346_transpose_y_1 = const()[name = string("op_16346_transpose_y_1"), val = bool(true)]; - tensor var_16346 = matmul(transpose_x = var_16346_transpose_x_1, transpose_y = var_16346_transpose_y_1, x = query_states_93_cast_fp16, y = x_373_cast_fp16)[name = string("op_16346")]; - fp16 var_16347_to_fp16 = const()[name = string("op_16347_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_139_cast_fp16 = mul(x = var_16346, y = var_16347_to_fp16)[name = string("attn_weights_139_cast_fp16")]; - tensor attn_weights_141_cast_fp16 = add(x = attn_weights_139_cast_fp16, y = causal_mask)[name = string("attn_weights_141_cast_fp16")]; - int32 var_16382 = const()[name = string("op_16382"), val = int32(-1)]; - tensor attn_weights_143_cast_fp16 = softmax(axis = var_16382, x = attn_weights_141_cast_fp16)[name = string("attn_weights_143_cast_fp16")]; - bool attn_output_231_transpose_x_0 = const()[name = string("attn_output_231_transpose_x_0"), val = bool(false)]; - bool attn_output_231_transpose_y_0 = const()[name = string("attn_output_231_transpose_y_0"), val = bool(false)]; - tensor attn_output_231_cast_fp16 = matmul(transpose_x = attn_output_231_transpose_x_0, transpose_y = attn_output_231_transpose_y_0, x = attn_weights_143_cast_fp16, y = x_379_cast_fp16)[name = string("attn_output_231_cast_fp16")]; - tensor var_16393_perm_0 = const()[name = string("op_16393_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_16397 = const()[name = string("op_16397"), val = tensor([1, 1, 1024])]; - tensor var_16393_cast_fp16 = transpose(perm = var_16393_perm_0, x = attn_output_231_cast_fp16)[name = string("transpose_16")]; - tensor attn_output_235_cast_fp16 = reshape(shape = var_16397, x = var_16393_cast_fp16)[name = string("attn_output_235_cast_fp16")]; - tensor var_16402 = const()[name = string("op_16402"), val = tensor([0, 2, 1])]; - string var_16418_pad_type_0 = const()[name = string("op_16418_pad_type_0"), val = string("valid")]; - int32 var_16418_groups_0 = const()[name = string("op_16418_groups_0"), val = int32(1)]; - tensor var_16418_strides_0 = const()[name = string("op_16418_strides_0"), val = tensor([1])]; - tensor var_16418_pad_0 = const()[name = string("op_16418_pad_0"), val = tensor([0, 0])]; - tensor var_16418_dilations_0 = const()[name = string("op_16418_dilations_0"), val = tensor([1])]; - tensor squeeze_23_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490540672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491425472))))[name = string("squeeze_23_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_16403_cast_fp16 = transpose(perm = var_16402, x = attn_output_235_cast_fp16)[name = string("transpose_15")]; - tensor var_16418_cast_fp16 = conv(dilations = var_16418_dilations_0, groups = var_16418_groups_0, pad = var_16418_pad_0, pad_type = var_16418_pad_type_0, strides = var_16418_strides_0, weight = squeeze_23_cast_fp16_to_fp32_to_fp16_palettized, x = var_16403_cast_fp16)[name = string("op_16418_cast_fp16")]; - tensor var_16422 = const()[name = string("op_16422"), val = tensor([0, 2, 1])]; - int32 var_16433 = const()[name = string("op_16433"), val = int32(-1)]; - fp16 const_900_promoted_to_fp16 = const()[name = string("const_900_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_377_cast_fp16 = transpose(perm = var_16422, x = var_16418_cast_fp16)[name = string("transpose_14")]; - tensor var_16435_cast_fp16 = mul(x = hidden_states_377_cast_fp16, y = const_900_promoted_to_fp16)[name = string("op_16435_cast_fp16")]; - bool input_471_interleave_0 = const()[name = string("input_471_interleave_0"), val = bool(false)]; - tensor input_471_cast_fp16 = concat(axis = var_16433, interleave = input_471_interleave_0, values = (hidden_states_377_cast_fp16, var_16435_cast_fp16))[name = string("input_471_cast_fp16")]; - tensor normed_565_axes_0 = const()[name = string("normed_565_axes_0"), val = tensor([-1])]; - fp16 var_16430_to_fp16 = const()[name = string("op_16430_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_565_cast_fp16 = layer_norm(axes = normed_565_axes_0, epsilon = var_16430_to_fp16, x = input_471_cast_fp16)[name = string("normed_565_cast_fp16")]; - tensor normed_567_begin_0 = const()[name = string("normed_567_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_567_end_0 = const()[name = string("normed_567_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_567_end_mask_0 = const()[name = string("normed_567_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_567_cast_fp16 = slice_by_index(begin = normed_567_begin_0, end = normed_567_end_0, end_mask = normed_567_end_mask_0, x = normed_565_cast_fp16)[name = string("normed_567_cast_fp16")]; - tensor var_16449_to_fp16 = const()[name = string("op_16449_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491443968)))]; - tensor attn_output_239_cast_fp16 = mul(x = normed_567_cast_fp16, y = var_16449_to_fp16)[name = string("attn_output_239_cast_fp16")]; - tensor hidden_states_379_cast_fp16 = add(x = hidden_states_369_cast_fp16, y = attn_output_239_cast_fp16)[name = string("hidden_states_379_cast_fp16")]; - int32 var_16462 = const()[name = string("op_16462"), val = int32(-1)]; - fp16 const_904_promoted_to_fp16 = const()[name = string("const_904_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16464_cast_fp16 = mul(x = hidden_states_379_cast_fp16, y = const_904_promoted_to_fp16)[name = string("op_16464_cast_fp16")]; - bool input_473_interleave_0 = const()[name = string("input_473_interleave_0"), val = bool(false)]; - tensor input_473_cast_fp16 = concat(axis = var_16462, interleave = input_473_interleave_0, values = (hidden_states_379_cast_fp16, var_16464_cast_fp16))[name = string("input_473_cast_fp16")]; - tensor normed_569_axes_0 = const()[name = string("normed_569_axes_0"), val = tensor([-1])]; - fp16 var_16459_to_fp16 = const()[name = string("op_16459_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_569_cast_fp16 = layer_norm(axes = normed_569_axes_0, epsilon = var_16459_to_fp16, x = input_473_cast_fp16)[name = string("normed_569_cast_fp16")]; - tensor normed_571_begin_0 = const()[name = string("normed_571_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_571_end_0 = const()[name = string("normed_571_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_571_end_mask_0 = const()[name = string("normed_571_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_571_cast_fp16 = slice_by_index(begin = normed_571_begin_0, end = normed_571_end_0, end_mask = normed_571_end_mask_0, x = normed_569_cast_fp16)[name = string("normed_571_cast_fp16")]; - tensor var_16478_to_fp16 = const()[name = string("op_16478_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491446336)))]; - tensor x_381_cast_fp16 = mul(x = normed_571_cast_fp16, y = var_16478_to_fp16)[name = string("x_381_cast_fp16")]; - tensor var_16490 = const()[name = string("op_16490"), val = tensor([0, 2, 1])]; - tensor input_475_axes_0 = const()[name = string("input_475_axes_0"), val = tensor([2])]; - tensor var_16491_cast_fp16 = transpose(perm = var_16490, x = x_381_cast_fp16)[name = string("transpose_13")]; - tensor input_475_cast_fp16 = expand_dims(axes = input_475_axes_0, x = var_16491_cast_fp16)[name = string("input_475_cast_fp16")]; - string x_383_pad_type_0 = const()[name = string("x_383_pad_type_0"), val = string("valid")]; - tensor x_383_strides_0 = const()[name = string("x_383_strides_0"), val = tensor([1, 1])]; - tensor x_383_pad_0 = const()[name = string("x_383_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_383_dilations_0 = const()[name = string("x_383_dilations_0"), val = tensor([1, 1])]; - int32 x_383_groups_0 = const()[name = string("x_383_groups_0"), val = int32(1)]; - tensor model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(834799936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840771968))))[name = string("model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_383_cast_fp16 = conv(dilations = x_383_dilations_0, groups = x_383_groups_0, pad = x_383_pad_0, pad_type = x_383_pad_type_0, strides = x_383_strides_0, weight = model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_475_cast_fp16)[name = string("x_383_cast_fp16")]; - string b_47_pad_type_0 = const()[name = string("b_47_pad_type_0"), val = string("valid")]; - tensor b_47_strides_0 = const()[name = string("b_47_strides_0"), val = tensor([1, 1])]; - tensor b_47_pad_0 = const()[name = string("b_47_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_47_dilations_0 = const()[name = string("b_47_dilations_0"), val = tensor([1, 1])]; - int32 b_47_groups_0 = const()[name = string("b_47_groups_0"), val = int32(1)]; - tensor model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840882624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846854656))))[name = string("model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_47_cast_fp16 = conv(dilations = b_47_dilations_0, groups = b_47_groups_0, pad = b_47_pad_0, pad_type = b_47_pad_type_0, strides = b_47_strides_0, weight = model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_475_cast_fp16)[name = string("b_47_cast_fp16")]; - string var_16516_mode_0 = const()[name = string("op_16516_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_16516_cast_fp16 = gelu(mode = var_16516_mode_0, x = x_383_cast_fp16)[name = string("op_16516_cast_fp16")]; - tensor input_477_cast_fp16 = mul(x = var_16516_cast_fp16, y = b_47_cast_fp16)[name = string("input_477_cast_fp16")]; - string e_47_pad_type_0 = const()[name = string("e_47_pad_type_0"), val = string("valid")]; - tensor e_47_strides_0 = const()[name = string("e_47_strides_0"), val = tensor([1, 1])]; - tensor e_47_pad_0 = const()[name = string("e_47_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_47_dilations_0 = const()[name = string("e_47_dilations_0"), val = tensor([1, 1])]; - int32 e_47_groups_0 = const()[name = string("e_47_groups_0"), val = int32(1)]; - tensor model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503614080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509586112))))[name = string("model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_47_cast_fp16 = conv(dilations = e_47_dilations_0, groups = e_47_groups_0, pad = e_47_pad_0, pad_type = e_47_pad_type_0, strides = e_47_strides_0, weight = model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_477_cast_fp16)[name = string("e_47_cast_fp16")]; - tensor var_16524_axes_0 = const()[name = string("op_16524_axes_0"), val = tensor([2])]; - tensor var_16524_cast_fp16 = squeeze(axes = var_16524_axes_0, x = e_47_cast_fp16)[name = string("op_16524_cast_fp16")]; - tensor var_16525 = const()[name = string("op_16525"), val = tensor([0, 2, 1])]; - int32 var_16536 = const()[name = string("op_16536"), val = int32(-1)]; - fp16 const_908_promoted_to_fp16 = const()[name = string("const_908_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_381_cast_fp16 = transpose(perm = var_16525, x = var_16524_cast_fp16)[name = string("transpose_12")]; - tensor var_16538_cast_fp16 = mul(x = hidden_states_381_cast_fp16, y = const_908_promoted_to_fp16)[name = string("op_16538_cast_fp16")]; - bool input_479_interleave_0 = const()[name = string("input_479_interleave_0"), val = bool(false)]; - tensor input_479_cast_fp16 = concat(axis = var_16536, interleave = input_479_interleave_0, values = (hidden_states_381_cast_fp16, var_16538_cast_fp16))[name = string("input_479_cast_fp16")]; - tensor normed_573_axes_0 = const()[name = string("normed_573_axes_0"), val = tensor([-1])]; - fp16 var_16533_to_fp16 = const()[name = string("op_16533_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_573_cast_fp16 = layer_norm(axes = normed_573_axes_0, epsilon = var_16533_to_fp16, x = input_479_cast_fp16)[name = string("normed_573_cast_fp16")]; - tensor normed_575_begin_0 = const()[name = string("normed_575_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_575_end_0 = const()[name = string("normed_575_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_575_end_mask_0 = const()[name = string("normed_575_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_575_cast_fp16 = slice_by_index(begin = normed_575_begin_0, end = normed_575_end_0, end_mask = normed_575_end_mask_0, x = normed_573_cast_fp16)[name = string("normed_575_cast_fp16")]; - tensor var_16552_to_fp16 = const()[name = string("op_16552_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509604608)))]; - tensor hidden_states_383_cast_fp16 = mul(x = normed_575_cast_fp16, y = var_16552_to_fp16)[name = string("hidden_states_383_cast_fp16")]; - tensor hidden_states_385_cast_fp16 = add(x = hidden_states_379_cast_fp16, y = hidden_states_383_cast_fp16)[name = string("hidden_states_385_cast_fp16")]; - int32 var_16603 = const()[name = string("op_16603"), val = int32(-1)]; - fp16 const_912_promoted_to_fp16 = const()[name = string("const_912_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16605_cast_fp16 = mul(x = hidden_states_385_cast_fp16, y = const_912_promoted_to_fp16)[name = string("op_16605_cast_fp16")]; - bool input_481_interleave_0 = const()[name = string("input_481_interleave_0"), val = bool(false)]; - tensor input_481_cast_fp16 = concat(axis = var_16603, interleave = input_481_interleave_0, values = (hidden_states_385_cast_fp16, var_16605_cast_fp16))[name = string("input_481_cast_fp16")]; - tensor normed_577_axes_0 = const()[name = string("normed_577_axes_0"), val = tensor([-1])]; - fp16 var_16600_to_fp16 = const()[name = string("op_16600_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_577_cast_fp16 = layer_norm(axes = normed_577_axes_0, epsilon = var_16600_to_fp16, x = input_481_cast_fp16)[name = string("normed_577_cast_fp16")]; - tensor normed_579_begin_0 = const()[name = string("normed_579_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_579_end_0 = const()[name = string("normed_579_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_579_end_mask_0 = const()[name = string("normed_579_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_579_cast_fp16 = slice_by_index(begin = normed_579_begin_0, end = normed_579_end_0, end_mask = normed_579_end_mask_0, x = normed_577_cast_fp16)[name = string("normed_579_cast_fp16")]; - tensor var_16619_to_fp16 = const()[name = string("op_16619_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509606976)))]; - tensor hidden_states_387_cast_fp16 = mul(x = normed_579_cast_fp16, y = var_16619_to_fp16)[name = string("hidden_states_387_cast_fp16")]; - tensor var_16624 = const()[name = string("op_16624"), val = tensor([0, 2, 1])]; - tensor var_16627_axes_0 = const()[name = string("op_16627_axes_0"), val = tensor([2])]; - tensor var_16625_cast_fp16 = transpose(perm = var_16624, x = hidden_states_387_cast_fp16)[name = string("transpose_11")]; - tensor var_16627_cast_fp16 = expand_dims(axes = var_16627_axes_0, x = var_16625_cast_fp16)[name = string("op_16627_cast_fp16")]; - string var_16643_pad_type_0 = const()[name = string("op_16643_pad_type_0"), val = string("valid")]; - tensor var_16643_strides_0 = const()[name = string("op_16643_strides_0"), val = tensor([1, 1])]; - tensor var_16643_pad_0 = const()[name = string("op_16643_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_16643_dilations_0 = const()[name = string("op_16643_dilations_0"), val = tensor([1, 1])]; - int32 var_16643_groups_0 = const()[name = string("op_16643_groups_0"), val = int32(1)]; - tensor var_16643 = conv(dilations = var_16643_dilations_0, groups = var_16643_groups_0, pad = var_16643_pad_0, pad_type = var_16643_pad_type_0, strides = var_16643_strides_0, weight = model_model_layers_24_self_attn_q_proj_weight_palettized, x = var_16627_cast_fp16)[name = string("op_16643")]; - tensor var_16648 = const()[name = string("op_16648"), val = tensor([1, 4, 1, 256])]; - tensor var_16649 = reshape(shape = var_16648, x = var_16643)[name = string("op_16649")]; - string var_16665_pad_type_0 = const()[name = string("op_16665_pad_type_0"), val = string("valid")]; - tensor var_16665_strides_0 = const()[name = string("op_16665_strides_0"), val = tensor([1, 1])]; - tensor var_16665_pad_0 = const()[name = string("op_16665_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_16665_dilations_0 = const()[name = string("op_16665_dilations_0"), val = tensor([1, 1])]; - int32 var_16665_groups_0 = const()[name = string("op_16665_groups_0"), val = int32(1)]; - tensor var_16665 = conv(dilations = var_16665_dilations_0, groups = var_16665_groups_0, pad = var_16665_pad_0, pad_type = var_16665_pad_type_0, strides = var_16665_strides_0, weight = model_model_layers_24_self_attn_k_proj_weight_palettized, x = var_16627_cast_fp16)[name = string("op_16665")]; - tensor var_16670 = const()[name = string("op_16670"), val = tensor([1, 1, 1, 256])]; - tensor var_16671 = reshape(shape = var_16670, x = var_16665)[name = string("op_16671")]; - string var_16687_pad_type_0 = const()[name = string("op_16687_pad_type_0"), val = string("valid")]; - tensor var_16687_strides_0 = const()[name = string("op_16687_strides_0"), val = tensor([1, 1])]; - tensor var_16687_pad_0 = const()[name = string("op_16687_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_16687_dilations_0 = const()[name = string("op_16687_dilations_0"), val = tensor([1, 1])]; - int32 var_16687_groups_0 = const()[name = string("op_16687_groups_0"), val = int32(1)]; - tensor var_16687 = conv(dilations = var_16687_dilations_0, groups = var_16687_groups_0, pad = var_16687_pad_0, pad_type = var_16687_pad_type_0, strides = var_16687_strides_0, weight = model_model_layers_24_self_attn_v_proj_weight_palettized, x = var_16627_cast_fp16)[name = string("op_16687")]; - tensor var_16692 = const()[name = string("op_16692"), val = tensor([1, 1, 1, 256])]; - tensor var_16693 = reshape(shape = var_16692, x = var_16687)[name = string("op_16693")]; - int32 var_16708 = const()[name = string("op_16708"), val = int32(-1)]; - fp16 const_916_promoted = const()[name = string("const_916_promoted"), val = fp16(-0x1p+0)]; - tensor var_16710 = mul(x = var_16649, y = const_916_promoted)[name = string("op_16710")]; - bool input_485_interleave_0 = const()[name = string("input_485_interleave_0"), val = bool(false)]; - tensor input_485 = concat(axis = var_16708, interleave = input_485_interleave_0, values = (var_16649, var_16710))[name = string("input_485")]; - tensor normed_581_axes_0 = const()[name = string("normed_581_axes_0"), val = tensor([-1])]; - fp16 var_16705_to_fp16 = const()[name = string("op_16705_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_581_cast_fp16 = layer_norm(axes = normed_581_axes_0, epsilon = var_16705_to_fp16, x = input_485)[name = string("normed_581_cast_fp16")]; - tensor normed_583_begin_0 = const()[name = string("normed_583_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_583_end_0 = const()[name = string("normed_583_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_583_end_mask_0 = const()[name = string("normed_583_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_583 = slice_by_index(begin = normed_583_begin_0, end = normed_583_end_0, end_mask = normed_583_end_mask_0, x = normed_581_cast_fp16)[name = string("normed_583")]; - tensor var_16724_to_fp16 = const()[name = string("op_16724_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509609344)))]; - tensor q_49_cast_fp16 = mul(x = normed_583, y = var_16724_to_fp16)[name = string("q_49_cast_fp16")]; - int32 var_16735 = const()[name = string("op_16735"), val = int32(-1)]; - fp16 const_920_promoted = const()[name = string("const_920_promoted"), val = fp16(-0x1p+0)]; - tensor var_16737 = mul(x = var_16671, y = const_920_promoted)[name = string("op_16737")]; - bool input_487_interleave_0 = const()[name = string("input_487_interleave_0"), val = bool(false)]; - tensor input_487 = concat(axis = var_16735, interleave = input_487_interleave_0, values = (var_16671, var_16737))[name = string("input_487")]; - tensor normed_585_axes_0 = const()[name = string("normed_585_axes_0"), val = tensor([-1])]; - fp16 var_16732_to_fp16 = const()[name = string("op_16732_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_585_cast_fp16 = layer_norm(axes = normed_585_axes_0, epsilon = var_16732_to_fp16, x = input_487)[name = string("normed_585_cast_fp16")]; - tensor normed_587_begin_0 = const()[name = string("normed_587_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_587_end_0 = const()[name = string("normed_587_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_587_end_mask_0 = const()[name = string("normed_587_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_587 = slice_by_index(begin = normed_587_begin_0, end = normed_587_end_0, end_mask = normed_587_end_mask_0, x = normed_585_cast_fp16)[name = string("normed_587")]; - tensor var_16751_to_fp16 = const()[name = string("op_16751_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509609920)))]; - tensor k_49_cast_fp16 = mul(x = normed_587, y = var_16751_to_fp16)[name = string("k_49_cast_fp16")]; - tensor var_16753_cast_fp16 = mul(x = q_49_cast_fp16, y = cos_1_cast_fp16)[name = string("op_16753_cast_fp16")]; - tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_97_cast_fp16 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = q_49_cast_fp16)[name = string("x1_97_cast_fp16")]; - tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_97_cast_fp16 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = q_49_cast_fp16)[name = string("x2_97_cast_fp16")]; - fp16 const_926_promoted_to_fp16 = const()[name = string("const_926_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16774_cast_fp16 = mul(x = x2_97_cast_fp16, y = const_926_promoted_to_fp16)[name = string("op_16774_cast_fp16")]; - int32 var_16776 = const()[name = string("op_16776"), val = int32(-1)]; - bool var_16777_interleave_0 = const()[name = string("op_16777_interleave_0"), val = bool(false)]; - tensor var_16777_cast_fp16 = concat(axis = var_16776, interleave = var_16777_interleave_0, values = (var_16774_cast_fp16, x1_97_cast_fp16))[name = string("op_16777_cast_fp16")]; - tensor var_16778_cast_fp16 = mul(x = var_16777_cast_fp16, y = sin_1_cast_fp16)[name = string("op_16778_cast_fp16")]; - tensor query_states_97_cast_fp16 = add(x = var_16753_cast_fp16, y = var_16778_cast_fp16)[name = string("query_states_97_cast_fp16")]; - tensor var_16781_cast_fp16 = mul(x = k_49_cast_fp16, y = cos_1_cast_fp16)[name = string("op_16781_cast_fp16")]; - tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_99_cast_fp16 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = k_49_cast_fp16)[name = string("x1_99_cast_fp16")]; - tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_99_cast_fp16 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = k_49_cast_fp16)[name = string("x2_99_cast_fp16")]; - fp16 const_929_promoted_to_fp16 = const()[name = string("const_929_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16802_cast_fp16 = mul(x = x2_99_cast_fp16, y = const_929_promoted_to_fp16)[name = string("op_16802_cast_fp16")]; - int32 var_16804 = const()[name = string("op_16804"), val = int32(-1)]; - bool var_16805_interleave_0 = const()[name = string("op_16805_interleave_0"), val = bool(false)]; - tensor var_16805_cast_fp16 = concat(axis = var_16804, interleave = var_16805_interleave_0, values = (var_16802_cast_fp16, x1_99_cast_fp16))[name = string("op_16805_cast_fp16")]; - tensor var_16806_cast_fp16 = mul(x = var_16805_cast_fp16, y = sin_1_cast_fp16)[name = string("op_16806_cast_fp16")]; - tensor key_states_97_cast_fp16 = add(x = var_16781_cast_fp16, y = var_16806_cast_fp16)[name = string("key_states_97_cast_fp16")]; - tensor key_slice_41_begin_0 = const()[name = string("key_slice_41_begin_0"), val = tensor([20, 0, 0, 0])]; - tensor key_slice_41_end_0 = const()[name = string("key_slice_41_end_0"), val = tensor([21, 1, 512, 256])]; - tensor key_slice_41_end_mask_0 = const()[name = string("key_slice_41_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_41_cast_fp16 = slice_by_index(begin = key_slice_41_begin_0, end = key_slice_41_end_0, end_mask = key_slice_41_end_mask_0, x = coreml_update_state_97)[name = string("key_slice_41_cast_fp16")]; - tensor key_tail_41_begin_0 = const()[name = string("key_tail_41_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_41_end_0 = const()[name = string("key_tail_41_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_41_cast_fp16 = slice_by_index(begin = key_tail_41_begin_0, end = key_tail_41_end_0, x = key_slice_41_cast_fp16)[name = string("key_tail_41_cast_fp16")]; - int32 var_16819 = const()[name = string("op_16819"), val = int32(2)]; - bool shifted_key_41_interleave_0 = const()[name = string("shifted_key_41_interleave_0"), val = bool(false)]; - tensor shifted_key_41_cast_fp16 = concat(axis = var_16819, interleave = shifted_key_41_interleave_0, values = (key_tail_41_cast_fp16, key_states_97_cast_fp16))[name = string("shifted_key_41_cast_fp16")]; - tensor concat_112 = const()[name = string("concat_112"), val = tensor([20, 0, 0, 0])]; - tensor concat_113 = const()[name = string("concat_113"), val = tensor([21, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16 = slice_update(begin = concat_112, begin_mask = model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0, end = concat_113, end_mask = model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_41_stride_0, update = shifted_key_41_cast_fp16, x = coreml_update_state_97)[name = string("model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_100_write_state")]; - tensor coreml_update_state_100 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_100")]; - tensor value_slice_41_begin_0 = const()[name = string("value_slice_41_begin_0"), val = tensor([42, 0, 0, 0])]; - tensor value_slice_41_end_0 = const()[name = string("value_slice_41_end_0"), val = tensor([43, 1, 512, 256])]; - tensor value_slice_41_end_mask_0 = const()[name = string("value_slice_41_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_41_cast_fp16 = slice_by_index(begin = value_slice_41_begin_0, end = value_slice_41_end_0, end_mask = value_slice_41_end_mask_0, x = coreml_update_state_100)[name = string("value_slice_41_cast_fp16")]; - tensor value_tail_41_begin_0 = const()[name = string("value_tail_41_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_41_end_0 = const()[name = string("value_tail_41_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_41_cast_fp16 = slice_by_index(begin = value_tail_41_begin_0, end = value_tail_41_end_0, x = value_slice_41_cast_fp16)[name = string("value_tail_41_cast_fp16")]; - int32 var_16853 = const()[name = string("op_16853"), val = int32(2)]; - bool shifted_value_41_interleave_0 = const()[name = string("shifted_value_41_interleave_0"), val = bool(false)]; - tensor shifted_value_41_cast_fp16 = concat(axis = var_16853, interleave = shifted_value_41_interleave_0, values = (value_tail_41_cast_fp16, var_16693))[name = string("shifted_value_41_cast_fp16")]; - tensor concat_114 = const()[name = string("concat_114"), val = tensor([42, 0, 0, 0])]; - tensor concat_115 = const()[name = string("concat_115"), val = tensor([43, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_42_stride_0, update = shifted_value_41_cast_fp16, x = coreml_update_state_100)[name = string("model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_101_write_state")]; - tensor coreml_update_state_101 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_101")]; - tensor var_16881_begin_0 = const()[name = string("op_16881_begin_0"), val = tensor([20, 0, 0, 0])]; - tensor var_16881_end_0 = const()[name = string("op_16881_end_0"), val = tensor([21, 1, 512, 256])]; - tensor var_16881_end_mask_0 = const()[name = string("op_16881_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_16881_cast_fp16 = slice_by_index(begin = var_16881_begin_0, end = var_16881_end_0, end_mask = var_16881_end_mask_0, x = coreml_update_state_101)[name = string("op_16881_cast_fp16")]; - tensor var_16888_begin_0 = const()[name = string("op_16888_begin_0"), val = tensor([42, 0, 0, 0])]; - tensor var_16888_end_0 = const()[name = string("op_16888_end_0"), val = tensor([43, 1, 512, 256])]; - tensor var_16888_end_mask_0 = const()[name = string("op_16888_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_16888_cast_fp16 = slice_by_index(begin = var_16888_begin_0, end = var_16888_end_0, end_mask = var_16888_end_mask_0, x = coreml_update_state_101)[name = string("op_16888_cast_fp16")]; - tensor var_16925 = const()[name = string("op_16925"), val = tensor([1, 4, 1, 1])]; - tensor x_389_cast_fp16 = tile(reps = var_16925, x = var_16881_cast_fp16)[name = string("x_389_cast_fp16")]; - tensor var_16945 = const()[name = string("op_16945"), val = tensor([1, 4, 1, 1])]; - tensor x_395_cast_fp16 = tile(reps = var_16945, x = var_16888_cast_fp16)[name = string("x_395_cast_fp16")]; - bool var_16972_transpose_x_1 = const()[name = string("op_16972_transpose_x_1"), val = bool(false)]; - bool var_16972_transpose_y_1 = const()[name = string("op_16972_transpose_y_1"), val = bool(true)]; - tensor var_16972 = matmul(transpose_x = var_16972_transpose_x_1, transpose_y = var_16972_transpose_y_1, x = query_states_97_cast_fp16, y = x_389_cast_fp16)[name = string("op_16972")]; - fp16 var_16973_to_fp16 = const()[name = string("op_16973_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_145_cast_fp16 = mul(x = var_16972, y = var_16973_to_fp16)[name = string("attn_weights_145_cast_fp16")]; - tensor attn_weights_147_cast_fp16 = add(x = attn_weights_145_cast_fp16, y = var_2059)[name = string("attn_weights_147_cast_fp16")]; - int32 var_17008 = const()[name = string("op_17008"), val = int32(-1)]; - tensor attn_weights_149_cast_fp16 = softmax(axis = var_17008, x = attn_weights_147_cast_fp16)[name = string("attn_weights_149_cast_fp16")]; - bool attn_output_241_transpose_x_0 = const()[name = string("attn_output_241_transpose_x_0"), val = bool(false)]; - bool attn_output_241_transpose_y_0 = const()[name = string("attn_output_241_transpose_y_0"), val = bool(false)]; - tensor attn_output_241_cast_fp16 = matmul(transpose_x = attn_output_241_transpose_x_0, transpose_y = attn_output_241_transpose_y_0, x = attn_weights_149_cast_fp16, y = x_395_cast_fp16)[name = string("attn_output_241_cast_fp16")]; - tensor var_17019_perm_0 = const()[name = string("op_17019_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_17023 = const()[name = string("op_17023"), val = tensor([1, 1, 1024])]; - tensor var_17019_cast_fp16 = transpose(perm = var_17019_perm_0, x = attn_output_241_cast_fp16)[name = string("transpose_10")]; - tensor attn_output_245_cast_fp16 = reshape(shape = var_17023, x = var_17019_cast_fp16)[name = string("attn_output_245_cast_fp16")]; - tensor var_17028 = const()[name = string("op_17028"), val = tensor([0, 2, 1])]; - string var_17044_pad_type_0 = const()[name = string("op_17044_pad_type_0"), val = string("valid")]; - int32 var_17044_groups_0 = const()[name = string("op_17044_groups_0"), val = int32(1)]; - tensor var_17044_strides_0 = const()[name = string("op_17044_strides_0"), val = tensor([1])]; - tensor var_17044_pad_0 = const()[name = string("op_17044_pad_0"), val = tensor([0, 0])]; - tensor var_17044_dilations_0 = const()[name = string("op_17044_dilations_0"), val = tensor([1])]; - tensor squeeze_24_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509610496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(510495296))))[name = string("squeeze_24_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_17029_cast_fp16 = transpose(perm = var_17028, x = attn_output_245_cast_fp16)[name = string("transpose_9")]; - tensor var_17044_cast_fp16 = conv(dilations = var_17044_dilations_0, groups = var_17044_groups_0, pad = var_17044_pad_0, pad_type = var_17044_pad_type_0, strides = var_17044_strides_0, weight = squeeze_24_cast_fp16_to_fp32_to_fp16_palettized, x = var_17029_cast_fp16)[name = string("op_17044_cast_fp16")]; - tensor var_17048 = const()[name = string("op_17048"), val = tensor([0, 2, 1])]; - int32 var_17059 = const()[name = string("op_17059"), val = int32(-1)]; - fp16 const_938_promoted_to_fp16 = const()[name = string("const_938_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_393_cast_fp16 = transpose(perm = var_17048, x = var_17044_cast_fp16)[name = string("transpose_8")]; - tensor var_17061_cast_fp16 = mul(x = hidden_states_393_cast_fp16, y = const_938_promoted_to_fp16)[name = string("op_17061_cast_fp16")]; - bool input_491_interleave_0 = const()[name = string("input_491_interleave_0"), val = bool(false)]; - tensor input_491_cast_fp16 = concat(axis = var_17059, interleave = input_491_interleave_0, values = (hidden_states_393_cast_fp16, var_17061_cast_fp16))[name = string("input_491_cast_fp16")]; - tensor normed_589_axes_0 = const()[name = string("normed_589_axes_0"), val = tensor([-1])]; - fp16 var_17056_to_fp16 = const()[name = string("op_17056_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_589_cast_fp16 = layer_norm(axes = normed_589_axes_0, epsilon = var_17056_to_fp16, x = input_491_cast_fp16)[name = string("normed_589_cast_fp16")]; - tensor normed_591_begin_0 = const()[name = string("normed_591_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_591_end_0 = const()[name = string("normed_591_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_591_end_mask_0 = const()[name = string("normed_591_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_591_cast_fp16 = slice_by_index(begin = normed_591_begin_0, end = normed_591_end_0, end_mask = normed_591_end_mask_0, x = normed_589_cast_fp16)[name = string("normed_591_cast_fp16")]; - tensor var_17075_to_fp16 = const()[name = string("op_17075_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(510513792)))]; - tensor attn_output_249_cast_fp16 = mul(x = normed_591_cast_fp16, y = var_17075_to_fp16)[name = string("attn_output_249_cast_fp16")]; - tensor hidden_states_395_cast_fp16 = add(x = hidden_states_385_cast_fp16, y = attn_output_249_cast_fp16)[name = string("hidden_states_395_cast_fp16")]; - int32 var_17088 = const()[name = string("op_17088"), val = int32(-1)]; - fp16 const_942_promoted_to_fp16 = const()[name = string("const_942_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17090_cast_fp16 = mul(x = hidden_states_395_cast_fp16, y = const_942_promoted_to_fp16)[name = string("op_17090_cast_fp16")]; - bool input_493_interleave_0 = const()[name = string("input_493_interleave_0"), val = bool(false)]; - tensor input_493_cast_fp16 = concat(axis = var_17088, interleave = input_493_interleave_0, values = (hidden_states_395_cast_fp16, var_17090_cast_fp16))[name = string("input_493_cast_fp16")]; - tensor normed_593_axes_0 = const()[name = string("normed_593_axes_0"), val = tensor([-1])]; - fp16 var_17085_to_fp16 = const()[name = string("op_17085_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_593_cast_fp16 = layer_norm(axes = normed_593_axes_0, epsilon = var_17085_to_fp16, x = input_493_cast_fp16)[name = string("normed_593_cast_fp16")]; - tensor normed_595_begin_0 = const()[name = string("normed_595_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_595_end_0 = const()[name = string("normed_595_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_595_end_mask_0 = const()[name = string("normed_595_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_595_cast_fp16 = slice_by_index(begin = normed_595_begin_0, end = normed_595_end_0, end_mask = normed_595_end_mask_0, x = normed_593_cast_fp16)[name = string("normed_595_cast_fp16")]; - tensor var_17104_to_fp16 = const()[name = string("op_17104_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(510516160)))]; - tensor x_397_cast_fp16 = mul(x = normed_595_cast_fp16, y = var_17104_to_fp16)[name = string("x_397_cast_fp16")]; - tensor var_17116 = const()[name = string("op_17116"), val = tensor([0, 2, 1])]; - tensor input_495_axes_0 = const()[name = string("input_495_axes_0"), val = tensor([2])]; - tensor var_17117_cast_fp16 = transpose(perm = var_17116, x = x_397_cast_fp16)[name = string("transpose_7")]; - tensor input_495_cast_fp16 = expand_dims(axes = input_495_axes_0, x = var_17117_cast_fp16)[name = string("input_495_cast_fp16")]; - string x_399_pad_type_0 = const()[name = string("x_399_pad_type_0"), val = string("valid")]; - tensor x_399_strides_0 = const()[name = string("x_399_strides_0"), val = tensor([1, 1])]; - tensor x_399_pad_0 = const()[name = string("x_399_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_399_dilations_0 = const()[name = string("x_399_dilations_0"), val = tensor([1, 1])]; - int32 x_399_groups_0 = const()[name = string("x_399_groups_0"), val = int32(1)]; - tensor model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846965312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852937344))))[name = string("model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_399_cast_fp16 = conv(dilations = x_399_dilations_0, groups = x_399_groups_0, pad = x_399_pad_0, pad_type = x_399_pad_type_0, strides = x_399_strides_0, weight = model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_495_cast_fp16)[name = string("x_399_cast_fp16")]; - string b_49_pad_type_0 = const()[name = string("b_49_pad_type_0"), val = string("valid")]; - tensor b_49_strides_0 = const()[name = string("b_49_strides_0"), val = tensor([1, 1])]; - tensor b_49_pad_0 = const()[name = string("b_49_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_49_dilations_0 = const()[name = string("b_49_dilations_0"), val = tensor([1, 1])]; - int32 b_49_groups_0 = const()[name = string("b_49_groups_0"), val = int32(1)]; - tensor model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(853048000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(859020032))))[name = string("model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_49_cast_fp16 = conv(dilations = b_49_dilations_0, groups = b_49_groups_0, pad = b_49_pad_0, pad_type = b_49_pad_type_0, strides = b_49_strides_0, weight = model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_495_cast_fp16)[name = string("b_49_cast_fp16")]; - string var_17142_mode_0 = const()[name = string("op_17142_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_17142_cast_fp16 = gelu(mode = var_17142_mode_0, x = x_399_cast_fp16)[name = string("op_17142_cast_fp16")]; - tensor input_497_cast_fp16 = mul(x = var_17142_cast_fp16, y = b_49_cast_fp16)[name = string("input_497_cast_fp16")]; - string e_49_pad_type_0 = const()[name = string("e_49_pad_type_0"), val = string("valid")]; - tensor e_49_strides_0 = const()[name = string("e_49_strides_0"), val = tensor([1, 1])]; - tensor e_49_pad_0 = const()[name = string("e_49_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_49_dilations_0 = const()[name = string("e_49_dilations_0"), val = tensor([1, 1])]; - int32 e_49_groups_0 = const()[name = string("e_49_groups_0"), val = int32(1)]; - tensor model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522683904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528655936))))[name = string("model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_49_cast_fp16 = conv(dilations = e_49_dilations_0, groups = e_49_groups_0, pad = e_49_pad_0, pad_type = e_49_pad_type_0, strides = e_49_strides_0, weight = model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_497_cast_fp16)[name = string("e_49_cast_fp16")]; - tensor var_17150_axes_0 = const()[name = string("op_17150_axes_0"), val = tensor([2])]; - tensor var_17150_cast_fp16 = squeeze(axes = var_17150_axes_0, x = e_49_cast_fp16)[name = string("op_17150_cast_fp16")]; - tensor var_17151 = const()[name = string("op_17151"), val = tensor([0, 2, 1])]; - int32 var_17162 = const()[name = string("op_17162"), val = int32(-1)]; - fp16 const_946_promoted_to_fp16 = const()[name = string("const_946_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_397_cast_fp16 = transpose(perm = var_17151, x = var_17150_cast_fp16)[name = string("transpose_6")]; - tensor var_17164_cast_fp16 = mul(x = hidden_states_397_cast_fp16, y = const_946_promoted_to_fp16)[name = string("op_17164_cast_fp16")]; - bool input_499_interleave_0 = const()[name = string("input_499_interleave_0"), val = bool(false)]; - tensor input_499_cast_fp16 = concat(axis = var_17162, interleave = input_499_interleave_0, values = (hidden_states_397_cast_fp16, var_17164_cast_fp16))[name = string("input_499_cast_fp16")]; - tensor normed_597_axes_0 = const()[name = string("normed_597_axes_0"), val = tensor([-1])]; - fp16 var_17159_to_fp16 = const()[name = string("op_17159_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_597_cast_fp16 = layer_norm(axes = normed_597_axes_0, epsilon = var_17159_to_fp16, x = input_499_cast_fp16)[name = string("normed_597_cast_fp16")]; - tensor normed_599_begin_0 = const()[name = string("normed_599_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_599_end_0 = const()[name = string("normed_599_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_599_end_mask_0 = const()[name = string("normed_599_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_599_cast_fp16 = slice_by_index(begin = normed_599_begin_0, end = normed_599_end_0, end_mask = normed_599_end_mask_0, x = normed_597_cast_fp16)[name = string("normed_599_cast_fp16")]; - tensor var_17178_to_fp16 = const()[name = string("op_17178_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528674432)))]; - tensor hidden_states_399_cast_fp16 = mul(x = normed_599_cast_fp16, y = var_17178_to_fp16)[name = string("hidden_states_399_cast_fp16")]; - tensor hidden_states_401_cast_fp16 = add(x = hidden_states_395_cast_fp16, y = hidden_states_399_cast_fp16)[name = string("hidden_states_401_cast_fp16")]; - int32 var_17229 = const()[name = string("op_17229"), val = int32(-1)]; - fp16 const_950_promoted_to_fp16 = const()[name = string("const_950_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17231_cast_fp16 = mul(x = hidden_states_401_cast_fp16, y = const_950_promoted_to_fp16)[name = string("op_17231_cast_fp16")]; - bool input_501_interleave_0 = const()[name = string("input_501_interleave_0"), val = bool(false)]; - tensor input_501_cast_fp16 = concat(axis = var_17229, interleave = input_501_interleave_0, values = (hidden_states_401_cast_fp16, var_17231_cast_fp16))[name = string("input_501_cast_fp16")]; - tensor normed_601_axes_0 = const()[name = string("normed_601_axes_0"), val = tensor([-1])]; - fp16 var_17226_to_fp16 = const()[name = string("op_17226_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_601_cast_fp16 = layer_norm(axes = normed_601_axes_0, epsilon = var_17226_to_fp16, x = input_501_cast_fp16)[name = string("normed_601_cast_fp16")]; - tensor normed_603_begin_0 = const()[name = string("normed_603_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_603_end_0 = const()[name = string("normed_603_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_603_end_mask_0 = const()[name = string("normed_603_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_603_cast_fp16 = slice_by_index(begin = normed_603_begin_0, end = normed_603_end_0, end_mask = normed_603_end_mask_0, x = normed_601_cast_fp16)[name = string("normed_603_cast_fp16")]; - tensor var_17245_to_fp16 = const()[name = string("op_17245_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528676800)))]; - tensor hidden_states_403_cast_fp16 = mul(x = normed_603_cast_fp16, y = var_17245_to_fp16)[name = string("hidden_states_403_cast_fp16")]; - tensor var_17250 = const()[name = string("op_17250"), val = tensor([0, 2, 1])]; - tensor var_17253_axes_0 = const()[name = string("op_17253_axes_0"), val = tensor([2])]; - tensor var_17251_cast_fp16 = transpose(perm = var_17250, x = hidden_states_403_cast_fp16)[name = string("transpose_5")]; - tensor var_17253_cast_fp16 = expand_dims(axes = var_17253_axes_0, x = var_17251_cast_fp16)[name = string("op_17253_cast_fp16")]; - string var_17269_pad_type_0 = const()[name = string("op_17269_pad_type_0"), val = string("valid")]; - tensor var_17269_strides_0 = const()[name = string("op_17269_strides_0"), val = tensor([1, 1])]; - tensor var_17269_pad_0 = const()[name = string("op_17269_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_17269_dilations_0 = const()[name = string("op_17269_dilations_0"), val = tensor([1, 1])]; - int32 var_17269_groups_0 = const()[name = string("op_17269_groups_0"), val = int32(1)]; - tensor var_17269 = conv(dilations = var_17269_dilations_0, groups = var_17269_groups_0, pad = var_17269_pad_0, pad_type = var_17269_pad_type_0, strides = var_17269_strides_0, weight = model_model_layers_25_self_attn_q_proj_weight_palettized, x = var_17253_cast_fp16)[name = string("op_17269")]; - tensor var_17274 = const()[name = string("op_17274"), val = tensor([1, 4, 1, 256])]; - tensor var_17275 = reshape(shape = var_17274, x = var_17269)[name = string("op_17275")]; - string var_17291_pad_type_0 = const()[name = string("op_17291_pad_type_0"), val = string("valid")]; - tensor var_17291_strides_0 = const()[name = string("op_17291_strides_0"), val = tensor([1, 1])]; - tensor var_17291_pad_0 = const()[name = string("op_17291_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_17291_dilations_0 = const()[name = string("op_17291_dilations_0"), val = tensor([1, 1])]; - int32 var_17291_groups_0 = const()[name = string("op_17291_groups_0"), val = int32(1)]; - tensor var_17291 = conv(dilations = var_17291_dilations_0, groups = var_17291_groups_0, pad = var_17291_pad_0, pad_type = var_17291_pad_type_0, strides = var_17291_strides_0, weight = model_model_layers_25_self_attn_k_proj_weight_palettized, x = var_17253_cast_fp16)[name = string("op_17291")]; - tensor var_17296 = const()[name = string("op_17296"), val = tensor([1, 1, 1, 256])]; - tensor var_17297 = reshape(shape = var_17296, x = var_17291)[name = string("op_17297")]; - string var_17313_pad_type_0 = const()[name = string("op_17313_pad_type_0"), val = string("valid")]; - tensor var_17313_strides_0 = const()[name = string("op_17313_strides_0"), val = tensor([1, 1])]; - tensor var_17313_pad_0 = const()[name = string("op_17313_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_17313_dilations_0 = const()[name = string("op_17313_dilations_0"), val = tensor([1, 1])]; - int32 var_17313_groups_0 = const()[name = string("op_17313_groups_0"), val = int32(1)]; - tensor var_17313 = conv(dilations = var_17313_dilations_0, groups = var_17313_groups_0, pad = var_17313_pad_0, pad_type = var_17313_pad_type_0, strides = var_17313_strides_0, weight = model_model_layers_25_self_attn_v_proj_weight_palettized, x = var_17253_cast_fp16)[name = string("op_17313")]; - tensor var_17318 = const()[name = string("op_17318"), val = tensor([1, 1, 1, 256])]; - tensor var_17319 = reshape(shape = var_17318, x = var_17313)[name = string("op_17319")]; - int32 var_17334 = const()[name = string("op_17334"), val = int32(-1)]; - fp16 const_954_promoted = const()[name = string("const_954_promoted"), val = fp16(-0x1p+0)]; - tensor var_17336 = mul(x = var_17275, y = const_954_promoted)[name = string("op_17336")]; - bool input_505_interleave_0 = const()[name = string("input_505_interleave_0"), val = bool(false)]; - tensor input_505 = concat(axis = var_17334, interleave = input_505_interleave_0, values = (var_17275, var_17336))[name = string("input_505")]; - tensor normed_605_axes_0 = const()[name = string("normed_605_axes_0"), val = tensor([-1])]; - fp16 var_17331_to_fp16 = const()[name = string("op_17331_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_605_cast_fp16 = layer_norm(axes = normed_605_axes_0, epsilon = var_17331_to_fp16, x = input_505)[name = string("normed_605_cast_fp16")]; - tensor normed_607_begin_0 = const()[name = string("normed_607_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_607_end_0 = const()[name = string("normed_607_end_0"), val = tensor([1, 4, 1, 256])]; - tensor normed_607_end_mask_0 = const()[name = string("normed_607_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_607 = slice_by_index(begin = normed_607_begin_0, end = normed_607_end_0, end_mask = normed_607_end_mask_0, x = normed_605_cast_fp16)[name = string("normed_607")]; - tensor var_17350_to_fp16 = const()[name = string("op_17350_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528679168)))]; - tensor q_cast_fp16 = mul(x = normed_607, y = var_17350_to_fp16)[name = string("q_cast_fp16")]; - int32 var_17361 = const()[name = string("op_17361"), val = int32(-1)]; - fp16 const_958_promoted = const()[name = string("const_958_promoted"), val = fp16(-0x1p+0)]; - tensor var_17363 = mul(x = var_17297, y = const_958_promoted)[name = string("op_17363")]; - bool input_507_interleave_0 = const()[name = string("input_507_interleave_0"), val = bool(false)]; - tensor input_507 = concat(axis = var_17361, interleave = input_507_interleave_0, values = (var_17297, var_17363))[name = string("input_507")]; - tensor normed_609_axes_0 = const()[name = string("normed_609_axes_0"), val = tensor([-1])]; - fp16 var_17358_to_fp16 = const()[name = string("op_17358_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_609_cast_fp16 = layer_norm(axes = normed_609_axes_0, epsilon = var_17358_to_fp16, x = input_507)[name = string("normed_609_cast_fp16")]; - tensor normed_611_begin_0 = const()[name = string("normed_611_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_611_end_0 = const()[name = string("normed_611_end_0"), val = tensor([1, 1, 1, 256])]; - tensor normed_611_end_mask_0 = const()[name = string("normed_611_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_611 = slice_by_index(begin = normed_611_begin_0, end = normed_611_end_0, end_mask = normed_611_end_mask_0, x = normed_609_cast_fp16)[name = string("normed_611")]; - tensor var_17377_to_fp16 = const()[name = string("op_17377_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528679744)))]; - tensor k_cast_fp16 = mul(x = normed_611, y = var_17377_to_fp16)[name = string("k_cast_fp16")]; - tensor var_17379_cast_fp16 = mul(x = q_cast_fp16, y = cos_1_cast_fp16)[name = string("op_17379_cast_fp16")]; - tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 4, 1, 128])]; - tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_101_cast_fp16 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = q_cast_fp16)[name = string("x1_101_cast_fp16")]; - tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 4, 1, 256])]; - tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_101_cast_fp16 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = q_cast_fp16)[name = string("x2_101_cast_fp16")]; - fp16 const_964_promoted_to_fp16 = const()[name = string("const_964_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17400_cast_fp16 = mul(x = x2_101_cast_fp16, y = const_964_promoted_to_fp16)[name = string("op_17400_cast_fp16")]; - int32 var_17402 = const()[name = string("op_17402"), val = int32(-1)]; - bool var_17403_interleave_0 = const()[name = string("op_17403_interleave_0"), val = bool(false)]; - tensor var_17403_cast_fp16 = concat(axis = var_17402, interleave = var_17403_interleave_0, values = (var_17400_cast_fp16, x1_101_cast_fp16))[name = string("op_17403_cast_fp16")]; - tensor var_17404_cast_fp16 = mul(x = var_17403_cast_fp16, y = sin_1_cast_fp16)[name = string("op_17404_cast_fp16")]; - tensor query_states_101_cast_fp16 = add(x = var_17379_cast_fp16, y = var_17404_cast_fp16)[name = string("query_states_101_cast_fp16")]; - tensor var_17407_cast_fp16 = mul(x = k_cast_fp16, y = cos_1_cast_fp16)[name = string("op_17407_cast_fp16")]; - tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 1, 1, 128])]; - tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_cast_fp16)[name = string("x1_cast_fp16")]; - tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 1, 1, 256])]; - tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_cast_fp16)[name = string("x2_cast_fp16")]; - fp16 const_967_promoted_to_fp16 = const()[name = string("const_967_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17428_cast_fp16 = mul(x = x2_cast_fp16, y = const_967_promoted_to_fp16)[name = string("op_17428_cast_fp16")]; - int32 var_17430 = const()[name = string("op_17430"), val = int32(-1)]; - bool var_17431_interleave_0 = const()[name = string("op_17431_interleave_0"), val = bool(false)]; - tensor var_17431_cast_fp16 = concat(axis = var_17430, interleave = var_17431_interleave_0, values = (var_17428_cast_fp16, x1_cast_fp16))[name = string("op_17431_cast_fp16")]; - tensor var_17432_cast_fp16 = mul(x = var_17431_cast_fp16, y = sin_1_cast_fp16)[name = string("op_17432_cast_fp16")]; - tensor key_states_101_cast_fp16 = add(x = var_17407_cast_fp16, y = var_17432_cast_fp16)[name = string("key_states_101_cast_fp16")]; - tensor key_slice_begin_0 = const()[name = string("key_slice_begin_0"), val = tensor([21, 0, 0, 0])]; - tensor key_slice_end_0 = const()[name = string("key_slice_end_0"), val = tensor([22, 1, 512, 256])]; - tensor key_slice_end_mask_0 = const()[name = string("key_slice_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_cast_fp16 = slice_by_index(begin = key_slice_begin_0, end = key_slice_end_0, end_mask = key_slice_end_mask_0, x = coreml_update_state_101)[name = string("key_slice_cast_fp16")]; - tensor key_tail_begin_0 = const()[name = string("key_tail_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor key_tail_end_0 = const()[name = string("key_tail_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_tail_cast_fp16 = slice_by_index(begin = key_tail_begin_0, end = key_tail_end_0, x = key_slice_cast_fp16)[name = string("key_tail_cast_fp16")]; - int32 var_17445 = const()[name = string("op_17445"), val = int32(2)]; - bool shifted_key_interleave_0 = const()[name = string("shifted_key_interleave_0"), val = bool(false)]; - tensor shifted_key_cast_fp16 = concat(axis = var_17445, interleave = shifted_key_interleave_0, values = (key_tail_cast_fp16, key_states_101_cast_fp16))[name = string("shifted_key_cast_fp16")]; - tensor concat_116 = const()[name = string("concat_116"), val = tensor([21, 0, 0, 0])]; - tensor concat_117 = const()[name = string("concat_117"), val = tensor([22, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16 = slice_update(begin = concat_116, begin_mask = model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0, end = concat_117, end_mask = model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_43_stride_0, update = shifted_key_cast_fp16, x = coreml_update_state_101)[name = string("model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_102_write_state")]; - tensor coreml_update_state_102 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_102")]; - tensor value_slice_begin_0 = const()[name = string("value_slice_begin_0"), val = tensor([43, 0, 0, 0])]; - tensor value_slice_end_0 = const()[name = string("value_slice_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_slice_end_mask_0 = const()[name = string("value_slice_end_mask_0"), val = tensor([true, true, true, true])]; - tensor value_slice_cast_fp16 = slice_by_index(begin = value_slice_begin_0, end = value_slice_end_0, end_mask = value_slice_end_mask_0, x = coreml_update_state_102)[name = string("value_slice_cast_fp16")]; - tensor value_tail_begin_0 = const()[name = string("value_tail_begin_0"), val = tensor([0, 0, 1, 0])]; - tensor value_tail_end_0 = const()[name = string("value_tail_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_tail_cast_fp16 = slice_by_index(begin = value_tail_begin_0, end = value_tail_end_0, x = value_slice_cast_fp16)[name = string("value_tail_cast_fp16")]; - int32 var_17479 = const()[name = string("op_17479"), val = int32(2)]; - bool shifted_value_interleave_0 = const()[name = string("shifted_value_interleave_0"), val = bool(false)]; - tensor shifted_value_cast_fp16 = concat(axis = var_17479, interleave = shifted_value_interleave_0, values = (value_tail_cast_fp16, var_17319))[name = string("shifted_value_cast_fp16")]; - tensor concat_118 = const()[name = string("concat_118"), val = tensor([43, 0, 0, 0])]; - tensor concat_119 = const()[name = string("concat_119"), val = tensor([44, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16 = slice_update(begin = concat_118, begin_mask = model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0, end = concat_119, end_mask = model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_44_stride_0, update = shifted_value_cast_fp16, x = coreml_update_state_102)[name = string("model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_103_write_state")]; - tensor coreml_update_state_103 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_103")]; - tensor var_17507_begin_0 = const()[name = string("op_17507_begin_0"), val = tensor([21, 0, 0, 0])]; - tensor var_17507_end_0 = const()[name = string("op_17507_end_0"), val = tensor([22, 1, 512, 256])]; - tensor var_17507_end_mask_0 = const()[name = string("op_17507_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_17507_cast_fp16 = slice_by_index(begin = var_17507_begin_0, end = var_17507_end_0, end_mask = var_17507_end_mask_0, x = coreml_update_state_103)[name = string("op_17507_cast_fp16")]; - tensor var_17514_begin_0 = const()[name = string("op_17514_begin_0"), val = tensor([43, 0, 0, 0])]; - tensor var_17514_end_0 = const()[name = string("op_17514_end_0"), val = tensor([1, 1, 512, 256])]; - tensor var_17514_end_mask_0 = const()[name = string("op_17514_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_17514_cast_fp16 = slice_by_index(begin = var_17514_begin_0, end = var_17514_end_0, end_mask = var_17514_end_mask_0, x = coreml_update_state_103)[name = string("op_17514_cast_fp16")]; - tensor var_17551 = const()[name = string("op_17551"), val = tensor([1, 4, 1, 1])]; - tensor x_405_cast_fp16 = tile(reps = var_17551, x = var_17507_cast_fp16)[name = string("x_405_cast_fp16")]; - tensor var_17571 = const()[name = string("op_17571"), val = tensor([1, 4, 1, 1])]; - tensor x_411_cast_fp16 = tile(reps = var_17571, x = var_17514_cast_fp16)[name = string("x_411_cast_fp16")]; - bool var_17598_transpose_x_1 = const()[name = string("op_17598_transpose_x_1"), val = bool(false)]; - bool var_17598_transpose_y_1 = const()[name = string("op_17598_transpose_y_1"), val = bool(true)]; - tensor var_17598 = matmul(transpose_x = var_17598_transpose_x_1, transpose_y = var_17598_transpose_y_1, x = query_states_101_cast_fp16, y = x_405_cast_fp16)[name = string("op_17598")]; - fp16 var_17599_to_fp16 = const()[name = string("op_17599_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_151_cast_fp16 = mul(x = var_17598, y = var_17599_to_fp16)[name = string("attn_weights_151_cast_fp16")]; - tensor attn_weights_153_cast_fp16 = add(x = attn_weights_151_cast_fp16, y = var_2059)[name = string("attn_weights_153_cast_fp16")]; - int32 var_17634 = const()[name = string("op_17634"), val = int32(-1)]; - tensor attn_weights_cast_fp16 = softmax(axis = var_17634, x = attn_weights_153_cast_fp16)[name = string("attn_weights_cast_fp16")]; - bool attn_output_251_transpose_x_0 = const()[name = string("attn_output_251_transpose_x_0"), val = bool(false)]; - bool attn_output_251_transpose_y_0 = const()[name = string("attn_output_251_transpose_y_0"), val = bool(false)]; - tensor attn_output_251_cast_fp16 = matmul(transpose_x = attn_output_251_transpose_x_0, transpose_y = attn_output_251_transpose_y_0, x = attn_weights_cast_fp16, y = x_411_cast_fp16)[name = string("attn_output_251_cast_fp16")]; - tensor var_17645_perm_0 = const()[name = string("op_17645_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_17649 = const()[name = string("op_17649"), val = tensor([1, 1, 1024])]; - tensor var_17645_cast_fp16 = transpose(perm = var_17645_perm_0, x = attn_output_251_cast_fp16)[name = string("transpose_4")]; - tensor attn_output_255_cast_fp16 = reshape(shape = var_17649, x = var_17645_cast_fp16)[name = string("attn_output_255_cast_fp16")]; - tensor var_17654 = const()[name = string("op_17654"), val = tensor([0, 2, 1])]; - string var_17670_pad_type_0 = const()[name = string("op_17670_pad_type_0"), val = string("valid")]; - int32 var_17670_groups_0 = const()[name = string("op_17670_groups_0"), val = int32(1)]; - tensor var_17670_strides_0 = const()[name = string("op_17670_strides_0"), val = tensor([1])]; - tensor var_17670_pad_0 = const()[name = string("op_17670_pad_0"), val = tensor([0, 0])]; - tensor var_17670_dilations_0 = const()[name = string("op_17670_dilations_0"), val = tensor([1])]; - tensor squeeze_25_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528680320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529565120))))[name = string("squeeze_25_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_17655_cast_fp16 = transpose(perm = var_17654, x = attn_output_255_cast_fp16)[name = string("transpose_3")]; - tensor var_17670_cast_fp16 = conv(dilations = var_17670_dilations_0, groups = var_17670_groups_0, pad = var_17670_pad_0, pad_type = var_17670_pad_type_0, strides = var_17670_strides_0, weight = squeeze_25_cast_fp16_to_fp32_to_fp16_palettized, x = var_17655_cast_fp16)[name = string("op_17670_cast_fp16")]; - tensor var_17674 = const()[name = string("op_17674"), val = tensor([0, 2, 1])]; - int32 var_17685 = const()[name = string("op_17685"), val = int32(-1)]; - fp16 const_976_promoted_to_fp16 = const()[name = string("const_976_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_409_cast_fp16 = transpose(perm = var_17674, x = var_17670_cast_fp16)[name = string("transpose_2")]; - tensor var_17687_cast_fp16 = mul(x = hidden_states_409_cast_fp16, y = const_976_promoted_to_fp16)[name = string("op_17687_cast_fp16")]; - bool input_511_interleave_0 = const()[name = string("input_511_interleave_0"), val = bool(false)]; - tensor input_511_cast_fp16 = concat(axis = var_17685, interleave = input_511_interleave_0, values = (hidden_states_409_cast_fp16, var_17687_cast_fp16))[name = string("input_511_cast_fp16")]; - tensor normed_613_axes_0 = const()[name = string("normed_613_axes_0"), val = tensor([-1])]; - fp16 var_17682_to_fp16 = const()[name = string("op_17682_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_613_cast_fp16 = layer_norm(axes = normed_613_axes_0, epsilon = var_17682_to_fp16, x = input_511_cast_fp16)[name = string("normed_613_cast_fp16")]; - tensor normed_615_begin_0 = const()[name = string("normed_615_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_615_end_0 = const()[name = string("normed_615_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_615_end_mask_0 = const()[name = string("normed_615_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_615_cast_fp16 = slice_by_index(begin = normed_615_begin_0, end = normed_615_end_0, end_mask = normed_615_end_mask_0, x = normed_613_cast_fp16)[name = string("normed_615_cast_fp16")]; - tensor var_17701_to_fp16 = const()[name = string("op_17701_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529583616)))]; - tensor attn_output_cast_fp16 = mul(x = normed_615_cast_fp16, y = var_17701_to_fp16)[name = string("attn_output_cast_fp16")]; - tensor hidden_states_411_cast_fp16 = add(x = hidden_states_401_cast_fp16, y = attn_output_cast_fp16)[name = string("hidden_states_411_cast_fp16")]; - int32 var_17714 = const()[name = string("op_17714"), val = int32(-1)]; - fp16 const_980_promoted_to_fp16 = const()[name = string("const_980_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17716_cast_fp16 = mul(x = hidden_states_411_cast_fp16, y = const_980_promoted_to_fp16)[name = string("op_17716_cast_fp16")]; - bool input_513_interleave_0 = const()[name = string("input_513_interleave_0"), val = bool(false)]; - tensor input_513_cast_fp16 = concat(axis = var_17714, interleave = input_513_interleave_0, values = (hidden_states_411_cast_fp16, var_17716_cast_fp16))[name = string("input_513_cast_fp16")]; - tensor normed_617_axes_0 = const()[name = string("normed_617_axes_0"), val = tensor([-1])]; - fp16 var_17711_to_fp16 = const()[name = string("op_17711_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_617_cast_fp16 = layer_norm(axes = normed_617_axes_0, epsilon = var_17711_to_fp16, x = input_513_cast_fp16)[name = string("normed_617_cast_fp16")]; - tensor normed_619_begin_0 = const()[name = string("normed_619_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_619_end_0 = const()[name = string("normed_619_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_619_end_mask_0 = const()[name = string("normed_619_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_619_cast_fp16 = slice_by_index(begin = normed_619_begin_0, end = normed_619_end_0, end_mask = normed_619_end_mask_0, x = normed_617_cast_fp16)[name = string("normed_619_cast_fp16")]; - tensor var_17730_to_fp16 = const()[name = string("op_17730_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529585984)))]; - tensor x_413_cast_fp16 = mul(x = normed_619_cast_fp16, y = var_17730_to_fp16)[name = string("x_413_cast_fp16")]; - tensor var_17742 = const()[name = string("op_17742"), val = tensor([0, 2, 1])]; - tensor input_515_axes_0 = const()[name = string("input_515_axes_0"), val = tensor([2])]; - tensor var_17743_cast_fp16 = transpose(perm = var_17742, x = x_413_cast_fp16)[name = string("transpose_1")]; - tensor input_515_cast_fp16 = expand_dims(axes = input_515_axes_0, x = var_17743_cast_fp16)[name = string("input_515_cast_fp16")]; - string x_pad_type_0 = const()[name = string("x_pad_type_0"), val = string("valid")]; - tensor x_strides_0 = const()[name = string("x_strides_0"), val = tensor([1, 1])]; - tensor x_pad_0 = const()[name = string("x_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_dilations_0 = const()[name = string("x_dilations_0"), val = tensor([1, 1])]; - int32 x_groups_0 = const()[name = string("x_groups_0"), val = int32(1)]; - tensor model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(859130688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865102720))))[name = string("model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_cast_fp16 = conv(dilations = x_dilations_0, groups = x_groups_0, pad = x_pad_0, pad_type = x_pad_type_0, strides = x_strides_0, weight = model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_515_cast_fp16)[name = string("x_cast_fp16")]; - string b_pad_type_0 = const()[name = string("b_pad_type_0"), val = string("valid")]; - tensor b_strides_0 = const()[name = string("b_strides_0"), val = tensor([1, 1])]; - tensor b_pad_0 = const()[name = string("b_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_dilations_0 = const()[name = string("b_dilations_0"), val = tensor([1, 1])]; - int32 b_groups_0 = const()[name = string("b_groups_0"), val = int32(1)]; - tensor model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(865213376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871185408))))[name = string("model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_cast_fp16 = conv(dilations = b_dilations_0, groups = b_groups_0, pad = b_pad_0, pad_type = b_pad_type_0, strides = b_strides_0, weight = model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_515_cast_fp16)[name = string("b_cast_fp16")]; - string var_17768_mode_0 = const()[name = string("op_17768_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_17768_cast_fp16 = gelu(mode = var_17768_mode_0, x = x_cast_fp16)[name = string("op_17768_cast_fp16")]; - tensor input_517_cast_fp16 = mul(x = var_17768_cast_fp16, y = b_cast_fp16)[name = string("input_517_cast_fp16")]; - string e_pad_type_0 = const()[name = string("e_pad_type_0"), val = string("valid")]; - tensor e_strides_0 = const()[name = string("e_strides_0"), val = tensor([1, 1])]; - tensor e_pad_0 = const()[name = string("e_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_dilations_0 = const()[name = string("e_dilations_0"), val = tensor([1, 1])]; - int32 e_groups_0 = const()[name = string("e_groups_0"), val = int32(1)]; - tensor model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(541753728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547725760))))[name = string("model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_cast_fp16 = conv(dilations = e_dilations_0, groups = e_groups_0, pad = e_pad_0, pad_type = e_pad_type_0, strides = e_strides_0, weight = model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_517_cast_fp16)[name = string("e_cast_fp16")]; - tensor var_17776_axes_0 = const()[name = string("op_17776_axes_0"), val = tensor([2])]; - tensor var_17776_cast_fp16 = squeeze(axes = var_17776_axes_0, x = e_cast_fp16)[name = string("op_17776_cast_fp16")]; - tensor var_17777 = const()[name = string("op_17777"), val = tensor([0, 2, 1])]; - int32 var_17788 = const()[name = string("op_17788"), val = int32(-1)]; - fp16 const_984_promoted_to_fp16 = const()[name = string("const_984_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_413_cast_fp16 = transpose(perm = var_17777, x = var_17776_cast_fp16)[name = string("transpose_0")]; - tensor var_17790_cast_fp16 = mul(x = hidden_states_413_cast_fp16, y = const_984_promoted_to_fp16)[name = string("op_17790_cast_fp16")]; - bool input_519_interleave_0 = const()[name = string("input_519_interleave_0"), val = bool(false)]; - tensor input_519_cast_fp16 = concat(axis = var_17788, interleave = input_519_interleave_0, values = (hidden_states_413_cast_fp16, var_17790_cast_fp16))[name = string("input_519_cast_fp16")]; - tensor normed_621_axes_0 = const()[name = string("normed_621_axes_0"), val = tensor([-1])]; - fp16 var_17785_to_fp16 = const()[name = string("op_17785_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_621_cast_fp16 = layer_norm(axes = normed_621_axes_0, epsilon = var_17785_to_fp16, x = input_519_cast_fp16)[name = string("normed_621_cast_fp16")]; - tensor normed_623_begin_0 = const()[name = string("normed_623_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_623_end_0 = const()[name = string("normed_623_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_623_end_mask_0 = const()[name = string("normed_623_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_623_cast_fp16 = slice_by_index(begin = normed_623_begin_0, end = normed_623_end_0, end_mask = normed_623_end_mask_0, x = normed_621_cast_fp16)[name = string("normed_623_cast_fp16")]; - tensor var_17804_to_fp16 = const()[name = string("op_17804_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547744256)))]; - tensor hidden_states_415_cast_fp16 = mul(x = normed_623_cast_fp16, y = var_17804_to_fp16)[name = string("hidden_states_415_cast_fp16")]; - tensor hidden_states_cast_fp16 = add(x = hidden_states_411_cast_fp16, y = hidden_states_415_cast_fp16)[name = string("hidden_states_cast_fp16")]; - int32 var_17817 = const()[name = string("op_17817"), val = int32(-1)]; - fp16 const_988_promoted_to_fp16 = const()[name = string("const_988_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17819_cast_fp16 = mul(x = hidden_states_cast_fp16, y = const_988_promoted_to_fp16)[name = string("op_17819_cast_fp16")]; - bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)]; - tensor input_cast_fp16 = concat(axis = var_17817, interleave = input_interleave_0, values = (hidden_states_cast_fp16, var_17819_cast_fp16))[name = string("input_cast_fp16")]; - tensor normed_625_axes_0 = const()[name = string("normed_625_axes_0"), val = tensor([-1])]; - fp16 var_17814_to_fp16 = const()[name = string("op_17814_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_625_cast_fp16 = layer_norm(axes = normed_625_axes_0, epsilon = var_17814_to_fp16, x = input_cast_fp16)[name = string("normed_625_cast_fp16")]; - tensor normed_begin_0 = const()[name = string("normed_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_end_0 = const()[name = string("normed_end_0"), val = tensor([1, 1, 1152])]; - tensor normed_end_mask_0 = const()[name = string("normed_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_cast_fp16 = slice_by_index(begin = normed_begin_0, end = normed_end_0, end_mask = normed_end_mask_0, x = normed_625_cast_fp16)[name = string("normed_cast_fp16")]; - tensor var_17833_to_fp16 = const()[name = string("op_17833_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547746624)))]; - tensor out_cast_fp16 = mul(x = normed_cast_fp16, y = var_17833_to_fp16)[name = string("out_cast_fp16")]; - tensor var_17837_begin_0 = const()[name = string("op_17837_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_17837_end_0 = const()[name = string("op_17837_end_0"), val = tensor([1, 1, 512, 256])]; - tensor var_17837_end_mask_0 = const()[name = string("op_17837_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_17837_squeeze_mask_0 = const()[name = string("op_17837_squeeze_mask_0"), val = tensor([true, false, false, false])]; - tensor var_17837_cast_fp16 = slice_by_index(begin = var_17837_begin_0, end = var_17837_end_0, end_mask = var_17837_end_mask_0, squeeze_mask = var_17837_squeeze_mask_0, x = coreml_update_state_103)[name = string("op_17837_cast_fp16")]; - tensor var_17840_begin_0 = const()[name = string("op_17840_begin_0"), val = tensor([0, 0, 0])]; - tensor var_17840_end_0 = const()[name = string("op_17840_end_0"), val = tensor([1, 512, 256])]; - tensor var_17840_end_mask_0 = const()[name = string("op_17840_end_mask_0"), val = tensor([false, true, true])]; - tensor var_17840_squeeze_mask_0 = const()[name = string("op_17840_squeeze_mask_0"), val = tensor([true, false, false])]; - tensor var_17840_cast_fp16 = slice_by_index(begin = var_17840_begin_0, end = var_17840_end_0, end_mask = var_17840_end_mask_0, squeeze_mask = var_17840_squeeze_mask_0, x = var_17837_cast_fp16)[name = string("op_17840_cast_fp16")]; - tensor var_17843_begin_0 = const()[name = string("op_17843_begin_0"), val = tensor([0, 0])]; - tensor var_17843_end_0 = const()[name = string("op_17843_end_0"), val = tensor([1, 256])]; - tensor var_17843_end_mask_0 = const()[name = string("op_17843_end_mask_0"), val = tensor([false, true])]; - tensor var_17843_squeeze_mask_0 = const()[name = string("op_17843_squeeze_mask_0"), val = tensor([true, false])]; - tensor var_17843_cast_fp16 = slice_by_index(begin = var_17843_begin_0, end = var_17843_end_0, end_mask = var_17843_end_mask_0, squeeze_mask = var_17843_squeeze_mask_0, x = var_17840_cast_fp16)[name = string("op_17843_cast_fp16")]; - tensor var_17846_begin_0 = const()[name = string("op_17846_begin_0"), val = tensor([0])]; - tensor var_17846_end_0 = const()[name = string("op_17846_end_0"), val = tensor([1])]; - tensor var_17846_end_mask_0 = const()[name = string("op_17846_end_mask_0"), val = tensor([false])]; - tensor var_17846_squeeze_mask_0 = const()[name = string("op_17846_squeeze_mask_0"), val = tensor([true])]; - fp16 var_17846_cast_fp16 = slice_by_index(begin = var_17846_begin_0, end = var_17846_end_0, end_mask = var_17846_end_mask_0, squeeze_mask = var_17846_squeeze_mask_0, x = var_17843_cast_fp16)[name = string("op_17846_cast_fp16")]; - fp16 var_17847_to_fp16 = const()[name = string("op_17847_to_fp16"), val = fp16(0x0p+0)]; - fp16 dummy_local_cast_fp16 = mul(x = var_17846_cast_fp16, y = var_17847_to_fp16)[name = string("dummy_local_cast_fp16")]; - tensor var_17851_begin_0 = const()[name = string("op_17851_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_17851_end_0 = const()[name = string("op_17851_end_0"), val = tensor([1, 1, 4096, 256])]; - tensor var_17851_end_mask_0 = const()[name = string("op_17851_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_17851_squeeze_mask_0 = const()[name = string("op_17851_squeeze_mask_0"), val = tensor([true, false, false, false])]; - tensor var_17851_cast_fp16 = slice_by_index(begin = var_17851_begin_0, end = var_17851_end_0, end_mask = var_17851_end_mask_0, squeeze_mask = var_17851_squeeze_mask_0, x = coreml_update_state_99)[name = string("op_17851_cast_fp16")]; - tensor var_17854_begin_0 = const()[name = string("op_17854_begin_0"), val = tensor([0, 0, 0])]; - tensor var_17854_end_0 = const()[name = string("op_17854_end_0"), val = tensor([1, 4096, 256])]; - tensor var_17854_end_mask_0 = const()[name = string("op_17854_end_mask_0"), val = tensor([false, true, true])]; - tensor var_17854_squeeze_mask_0 = const()[name = string("op_17854_squeeze_mask_0"), val = tensor([true, false, false])]; - tensor var_17854_cast_fp16 = slice_by_index(begin = var_17854_begin_0, end = var_17854_end_0, end_mask = var_17854_end_mask_0, squeeze_mask = var_17854_squeeze_mask_0, x = var_17851_cast_fp16)[name = string("op_17854_cast_fp16")]; - tensor var_17857_begin_0 = const()[name = string("op_17857_begin_0"), val = tensor([0, 0])]; - tensor var_17857_end_0 = const()[name = string("op_17857_end_0"), val = tensor([1, 256])]; - tensor var_17857_end_mask_0 = const()[name = string("op_17857_end_mask_0"), val = tensor([false, true])]; - tensor var_17857_squeeze_mask_0 = const()[name = string("op_17857_squeeze_mask_0"), val = tensor([true, false])]; - tensor var_17857_cast_fp16 = slice_by_index(begin = var_17857_begin_0, end = var_17857_end_0, end_mask = var_17857_end_mask_0, squeeze_mask = var_17857_squeeze_mask_0, x = var_17854_cast_fp16)[name = string("op_17857_cast_fp16")]; - tensor var_17860_begin_0 = const()[name = string("op_17860_begin_0"), val = tensor([0])]; - tensor var_17860_end_0 = const()[name = string("op_17860_end_0"), val = tensor([1])]; - tensor var_17860_end_mask_0 = const()[name = string("op_17860_end_mask_0"), val = tensor([false])]; - tensor var_17860_squeeze_mask_0 = const()[name = string("op_17860_squeeze_mask_0"), val = tensor([true])]; - fp16 var_17860_cast_fp16 = slice_by_index(begin = var_17860_begin_0, end = var_17860_end_0, end_mask = var_17860_end_mask_0, squeeze_mask = var_17860_squeeze_mask_0, x = var_17857_cast_fp16)[name = string("op_17860_cast_fp16")]; - fp16 var_17861_to_fp16 = const()[name = string("op_17861_to_fp16"), val = fp16(0x0p+0)]; - fp16 dummy_global_cast_fp16 = mul(x = var_17860_cast_fp16, y = var_17861_to_fp16)[name = string("dummy_global_cast_fp16")]; - fp16 var_17864_cast_fp16 = add(x = dummy_local_cast_fp16, y = dummy_global_cast_fp16)[name = string("op_17864_cast_fp16")]; - tensor var_17868 = const()[name = string("op_17868"), val = tensor([1, 1, 1])]; - tensor var_17869_cast_fp16 = reshape(shape = var_17868, x = var_17864_cast_fp16)[name = string("op_17869_cast_fp16")]; - tensor output_hidden_states = add(x = out_cast_fp16, y = var_17869_cast_fp16)[name = string("op_17871_cast_fp16")]; - tensor position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")]; - } -> (output_hidden_states); - func prefill(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_global, state> model_model_kv_cache_local, tensor position_ids) { - tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871296064))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(901312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871312512))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1126720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871316672))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1352128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871320832))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2253376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871337280))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2478784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2700032))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548679168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871341440))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3605440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3826688))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3830848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871357888))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4056256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871362048))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549605184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871378496))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871382656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871603904))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871608064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872492864))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6309568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872509312))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6534976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872513472))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872517632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(873402432))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7661632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(873418880))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7887040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(873423040))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8112448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(873427200))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9013696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(873443648))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9239104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(873447808))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9464512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(873451968))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10365760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(873468416))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10591168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(873472576))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(873476736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(874361536))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11717824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(874377984))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11943232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(874382144))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(874386304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875271104))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13069888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875287552))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13295296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875291712))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13520704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875295872))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14421952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875312320))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14647360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875316480))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14872768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875320640))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15774016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554448576))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15999424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875337088))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16224832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875341248))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17126080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17347328))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17351488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875357696))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17576896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875361856))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18478144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875378304))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18703552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18924800))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(875382464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876267264))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19830208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876283712))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20055616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20276864))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556288320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876287872))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21182272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876304320))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21407680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876308480))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21633088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876312640))))[name = string("model_model_layers_16_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_16_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22534336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876329088))))[name = string("model_model_layers_16_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_16_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22759744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876333248))))[name = string("model_model_layers_16_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557435584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876337408))))[name = string("model_model_layers_17_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_17_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558336832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876353856))))[name = string("model_model_layers_17_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_17_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24111808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24333056))))[name = string("model_model_layers_17_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24337216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876358016))))[name = string("model_model_layers_18_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_18_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25238464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876374464))))[name = string("model_model_layers_18_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_18_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25463872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558582848))))[name = string("model_model_layers_18_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25689280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876378624))))[name = string("model_model_layers_19_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_19_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26590528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876395072))))[name = string("model_model_layers_19_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_19_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26815936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876399232))))[name = string("model_model_layers_19_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27041344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876403392))))[name = string("model_model_layers_20_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_20_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27942592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876419840))))[name = string("model_model_layers_20_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_20_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28168000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876424000))))[name = string("model_model_layers_20_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_21_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876428160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(877312960))))[name = string("model_model_layers_21_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_21_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29294656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(877329408))))[name = string("model_model_layers_21_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_21_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29520064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(877333568))))[name = string("model_model_layers_21_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_22_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(877337728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878222528))))[name = string("model_model_layers_22_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_22_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30646720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878238976))))[name = string("model_model_layers_22_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_22_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878243136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878464384))))[name = string("model_model_layers_22_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_23_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31097536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878468544))))[name = string("model_model_layers_23_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_23_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31998784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878484992))))[name = string("model_model_layers_23_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_23_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878489152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878710400))))[name = string("model_model_layers_23_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_24_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32449600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878714560))))[name = string("model_model_layers_24_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_24_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33350848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33572096))))[name = string("model_model_layers_24_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_24_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33576256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878731008))))[name = string("model_model_layers_24_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_25_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560058752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878735168))))[name = string("model_model_layers_25_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_25_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34702912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878751616))))[name = string("model_model_layers_25_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_25_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34928320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878755776))))[name = string("model_model_layers_25_self_attn_v_proj_weight_palettized")]; - int32 var_1662_batch_dims_0 = const()[name = string("op_1662_batch_dims_0"), val = int32(0)]; - bool var_1662_validate_indices_0 = const()[name = string("op_1662_validate_indices_0"), val = bool(false)]; - tensor var_1654_to_fp16 = const()[name = string("op_1654_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39348096)))]; - string position_ids_to_int16_dtype_0 = const()[name = string("position_ids_to_int16_dtype_0"), val = string("int16")]; - string cast_266_dtype_0 = const()[name = string("cast_266_dtype_0"), val = string("int32")]; - int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; - tensor position_ids_to_int16 = cast(dtype = position_ids_to_int16_dtype_0, x = position_ids)[name = string("cast_5")]; - tensor cast_266 = cast(dtype = cast_266_dtype_0, x = position_ids_to_int16)[name = string("cast_4")]; - tensor greater_equal_0 = greater_equal(x = cast_266, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; - int32 slice_by_index_208 = const()[name = string("slice_by_index_208"), val = int32(8192)]; - tensor add_0 = add(x = cast_266, y = slice_by_index_208)[name = string("add_0")]; - tensor select_0 = select(a = cast_266, b = add_0, cond = greater_equal_0)[name = string("select_0")]; - string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")]; - string cast_0_dtype_0 = const()[name = string("cast_0_dtype_0"), val = string("int32")]; - int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)]; - tensor select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_3")]; - tensor cast_0 = cast(dtype = cast_0_dtype_0, x = select_0_to_int16)[name = string("cast_2")]; - tensor greater_equal_0_1 = greater_equal(x = cast_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")]; - int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(8192)]; - tensor add_0_1 = add(x = cast_0, y = slice_by_index_0)[name = string("add_0_1")]; - tensor select_0_1 = select(a = cast_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")]; - int32 op_1662_cast_fp16_cast_uint16_cast_uint16_axis_0 = const()[name = string("op_1662_cast_fp16_cast_uint16_cast_uint16_axis_0"), val = int32(1)]; - tensor op_1662_cast_fp16_cast_uint16_cast_uint16 = gather(axis = op_1662_cast_fp16_cast_uint16_cast_uint16_axis_0, batch_dims = var_1662_batch_dims_0, indices = select_0_1, validate_indices = var_1662_validate_indices_0, x = var_1654_to_fp16)[name = string("op_1662_cast_fp16_cast_uint16_cast_uint16")]; - tensor var_1666 = const()[name = string("op_1666"), val = tensor([1, 64, 1, 256])]; - tensor cos_1_cast_fp16 = reshape(shape = var_1666, x = op_1662_cast_fp16_cast_uint16_cast_uint16)[name = string("cos_1_cast_fp16")]; - int32 var_1676_axis_0 = const()[name = string("op_1676_axis_0"), val = int32(1)]; - int32 var_1676_batch_dims_0 = const()[name = string("op_1676_batch_dims_0"), val = int32(0)]; - bool var_1676_validate_indices_0 = const()[name = string("op_1676_validate_indices_0"), val = bool(false)]; - tensor var_1668_to_fp16 = const()[name = string("op_1668_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35153728)))]; - string position_ids_to_uint16_dtype_0 = const()[name = string("position_ids_to_uint16_dtype_0"), val = string("uint16")]; - tensor position_ids_to_uint16 = cast(dtype = position_ids_to_uint16_dtype_0, x = position_ids)[name = string("cast_1")]; - tensor var_1676_cast_fp16_cast_uint16 = gather(axis = var_1676_axis_0, batch_dims = var_1676_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_1676_validate_indices_0, x = var_1668_to_fp16)[name = string("op_1676_cast_fp16_cast_uint16")]; - tensor var_1680 = const()[name = string("op_1680"), val = tensor([1, 64, 1, 256])]; - tensor sin_1_cast_fp16 = reshape(shape = var_1680, x = var_1676_cast_fp16_cast_uint16)[name = string("sin_1_cast_fp16")]; - int32 var_1701 = const()[name = string("op_1701"), val = int32(-1)]; - fp16 const_1_promoted_to_fp16 = const()[name = string("const_1_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_1703_cast_fp16 = mul(x = hidden_states, y = const_1_promoted_to_fp16)[name = string("op_1703_cast_fp16")]; - bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; - tensor input_1_cast_fp16 = concat(axis = var_1701, interleave = input_1_interleave_0, values = (hidden_states, var_1703_cast_fp16))[name = string("input_1_cast_fp16")]; - tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; - fp16 var_1698_to_fp16 = const()[name = string("op_1698_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_1698_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")]; - tensor normed_3_begin_0 = const()[name = string("normed_3_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_3_end_0 = const()[name = string("normed_3_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_3_end_mask_0 = const()[name = string("normed_3_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_3_cast_fp16 = slice_by_index(begin = normed_3_begin_0, end = normed_3_end_0, end_mask = normed_3_end_mask_0, x = normed_1_cast_fp16)[name = string("normed_3_cast_fp16")]; - tensor var_1717_to_fp16 = const()[name = string("op_1717_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43542464)))]; - tensor hidden_states_3_cast_fp16 = mul(x = normed_3_cast_fp16, y = var_1717_to_fp16)[name = string("hidden_states_3_cast_fp16")]; - tensor var_1728 = const()[name = string("op_1728"), val = tensor([0, 2, 1])]; - tensor var_1731_axes_0 = const()[name = string("op_1731_axes_0"), val = tensor([2])]; - tensor var_1729_cast_fp16 = transpose(perm = var_1728, x = hidden_states_3_cast_fp16)[name = string("transpose_237")]; - tensor var_1731_cast_fp16 = expand_dims(axes = var_1731_axes_0, x = var_1729_cast_fp16)[name = string("op_1731_cast_fp16")]; - string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; - tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; - tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; - int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; - tensor query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_1731_cast_fp16)[name = string("query_states_1")]; - string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; - tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; - tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; - int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; - tensor key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_1731_cast_fp16)[name = string("key_states_1")]; - string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; - tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; - tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; - int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; - tensor value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_1731_cast_fp16)[name = string("value_states_1")]; - tensor var_1773 = const()[name = string("op_1773"), val = tensor([1, 4, 256, 64])]; - tensor var_1774 = reshape(shape = var_1773, x = query_states_1)[name = string("op_1774")]; - tensor var_1779 = const()[name = string("op_1779"), val = tensor([0, 1, 3, 2])]; - tensor var_1784 = const()[name = string("op_1784"), val = tensor([1, 1, 256, 64])]; - tensor var_1785 = reshape(shape = var_1784, x = key_states_1)[name = string("op_1785")]; - tensor var_1790 = const()[name = string("op_1790"), val = tensor([0, 1, 3, 2])]; - tensor var_1795 = const()[name = string("op_1795"), val = tensor([1, 1, 256, 64])]; - tensor var_1796 = reshape(shape = var_1795, x = value_states_1)[name = string("op_1796")]; - tensor var_1801 = const()[name = string("op_1801"), val = tensor([0, 1, 3, 2])]; - int32 var_1812 = const()[name = string("op_1812"), val = int32(-1)]; - fp16 const_6_promoted = const()[name = string("const_6_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_5 = transpose(perm = var_1779, x = var_1774)[name = string("transpose_236")]; - tensor var_1814 = mul(x = hidden_states_5, y = const_6_promoted)[name = string("op_1814")]; - bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; - tensor input_5 = concat(axis = var_1812, interleave = input_5_interleave_0, values = (hidden_states_5, var_1814))[name = string("input_5")]; - tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; - fp16 var_1809_to_fp16 = const()[name = string("op_1809_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_1809_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; - tensor normed_7_begin_0 = const()[name = string("normed_7_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_7_end_0 = const()[name = string("normed_7_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_7_end_mask_0 = const()[name = string("normed_7_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_7 = slice_by_index(begin = normed_7_begin_0, end = normed_7_end_0, end_mask = normed_7_end_mask_0, x = normed_5_cast_fp16)[name = string("normed_7")]; - tensor var_1828_to_fp16 = const()[name = string("op_1828_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43544832)))]; - tensor q_1_cast_fp16 = mul(x = normed_7, y = var_1828_to_fp16)[name = string("q_1_cast_fp16")]; - int32 var_1839 = const()[name = string("op_1839"), val = int32(-1)]; - fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_7 = transpose(perm = var_1790, x = var_1785)[name = string("transpose_235")]; - tensor var_1841 = mul(x = hidden_states_7, y = const_10_promoted)[name = string("op_1841")]; - bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)]; - tensor input_7 = concat(axis = var_1839, interleave = input_7_interleave_0, values = (hidden_states_7, var_1841))[name = string("input_7")]; - tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; - fp16 var_1836_to_fp16 = const()[name = string("op_1836_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_1836_to_fp16, x = input_7)[name = string("normed_9_cast_fp16")]; - tensor normed_11_begin_0 = const()[name = string("normed_11_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_11_end_0 = const()[name = string("normed_11_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_11_end_mask_0 = const()[name = string("normed_11_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_11 = slice_by_index(begin = normed_11_begin_0, end = normed_11_end_0, end_mask = normed_11_end_mask_0, x = normed_9_cast_fp16)[name = string("normed_11")]; - tensor var_1855_to_fp16 = const()[name = string("op_1855_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43545408)))]; - tensor k_1_cast_fp16 = mul(x = normed_11, y = var_1855_to_fp16)[name = string("k_1_cast_fp16")]; - tensor var_1861 = const()[name = string("op_1861"), val = tensor([0, 2, 1, 3])]; - tensor var_1867 = const()[name = string("op_1867"), val = tensor([0, 2, 1, 3])]; - tensor cos_5 = transpose(perm = var_1861, x = cos_1_cast_fp16)[name = string("transpose_234")]; - tensor var_1869_cast_fp16 = mul(x = q_1_cast_fp16, y = cos_5)[name = string("op_1869_cast_fp16")]; - tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1_cast_fp16)[name = string("x1_1_cast_fp16")]; - tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1_cast_fp16)[name = string("x2_1_cast_fp16")]; - fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_1890_cast_fp16 = mul(x = x2_1_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1890_cast_fp16")]; - int32 var_1892 = const()[name = string("op_1892"), val = int32(-1)]; - bool var_1893_interleave_0 = const()[name = string("op_1893_interleave_0"), val = bool(false)]; - tensor var_1893_cast_fp16 = concat(axis = var_1892, interleave = var_1893_interleave_0, values = (var_1890_cast_fp16, x1_1_cast_fp16))[name = string("op_1893_cast_fp16")]; - tensor sin_5 = transpose(perm = var_1867, x = sin_1_cast_fp16)[name = string("transpose_233")]; - tensor var_1894_cast_fp16 = mul(x = var_1893_cast_fp16, y = sin_5)[name = string("op_1894_cast_fp16")]; - tensor query_states_3_cast_fp16 = add(x = var_1869_cast_fp16, y = var_1894_cast_fp16)[name = string("query_states_3_cast_fp16")]; - tensor var_1897_cast_fp16 = mul(x = k_1_cast_fp16, y = cos_5)[name = string("op_1897_cast_fp16")]; - tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1_cast_fp16)[name = string("x1_3_cast_fp16")]; - tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1_cast_fp16)[name = string("x2_3_cast_fp16")]; - fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_1918_cast_fp16 = mul(x = x2_3_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1918_cast_fp16")]; - int32 var_1920 = const()[name = string("op_1920"), val = int32(-1)]; - bool var_1921_interleave_0 = const()[name = string("op_1921_interleave_0"), val = bool(false)]; - tensor var_1921_cast_fp16 = concat(axis = var_1920, interleave = var_1921_interleave_0, values = (var_1918_cast_fp16, x1_3_cast_fp16))[name = string("op_1921_cast_fp16")]; - tensor var_1922_cast_fp16 = mul(x = var_1921_cast_fp16, y = sin_5)[name = string("op_1922_cast_fp16")]; - tensor key_states_3_cast_fp16 = add(x = var_1897_cast_fp16, y = var_1922_cast_fp16)[name = string("key_states_3_cast_fp16")]; - tensor seq_len_5 = const()[name = string("seq_len_5"), val = tensor([64])]; - tensor end_pos_1 = add(x = current_pos, y = seq_len_5)[name = string("end_pos_1")]; - tensor read_state_0 = read_state(input = model_model_kv_cache_local)[name = string("read_state_0")]; - tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; - tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; - tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; - tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; - int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; - bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; - tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; - tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; - tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; - int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; - bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; - tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, end_pos_1, concat_3_values3_0))[name = string("concat_3")]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_1_stride_0, update = key_states_3_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_104_write_state")]; - tensor coreml_update_state_52 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_104")]; - tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([22])]; - tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; - tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; - tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([23])]; - int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; - bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; - tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; - tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; - tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; - int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; - bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; - tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, end_pos_1, concat_7_values3_0))[name = string("concat_7")]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_3 = transpose(perm = var_1801, x = var_1796)[name = string("transpose_232")]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_52)[name = string("model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_105_write_state")]; - tensor coreml_update_state_53 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_105")]; - tensor var_2021_begin_0 = const()[name = string("op_2021_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2021_end_0 = const()[name = string("op_2021_end_0"), val = tensor([1, 1, 512, 256])]; - tensor var_2021_end_mask_0 = const()[name = string("op_2021_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_2021_cast_fp16 = slice_by_index(begin = var_2021_begin_0, end = var_2021_end_0, end_mask = var_2021_end_mask_0, x = coreml_update_state_53)[name = string("op_2021_cast_fp16")]; - tensor var_2028_begin_0 = const()[name = string("op_2028_begin_0"), val = tensor([22, 0, 0, 0])]; - tensor var_2028_end_0 = const()[name = string("op_2028_end_0"), val = tensor([23, 1, 512, 256])]; - tensor var_2028_end_mask_0 = const()[name = string("op_2028_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_2028_cast_fp16 = slice_by_index(begin = var_2028_begin_0, end = var_2028_end_0, end_mask = var_2028_end_mask_0, x = coreml_update_state_53)[name = string("op_2028_cast_fp16")]; - tensor var_2067 = const()[name = string("op_2067"), val = tensor([1, 4, 1, 1])]; - tensor x_5_cast_fp16 = tile(reps = var_2067, x = var_2021_cast_fp16)[name = string("x_5_cast_fp16")]; - tensor var_2087 = const()[name = string("op_2087"), val = tensor([1, 4, 1, 1])]; - tensor x_11_cast_fp16 = tile(reps = var_2087, x = var_2028_cast_fp16)[name = string("x_11_cast_fp16")]; - bool var_2114_transpose_x_0 = const()[name = string("op_2114_transpose_x_0"), val = bool(false)]; - bool var_2114_transpose_y_0 = const()[name = string("op_2114_transpose_y_0"), val = bool(true)]; - tensor var_2114 = matmul(transpose_x = var_2114_transpose_x_0, transpose_y = var_2114_transpose_y_0, x = query_states_3_cast_fp16, y = x_5_cast_fp16)[name = string("op_2114")]; - fp16 var_2115_to_fp16 = const()[name = string("op_2115_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_1_cast_fp16 = mul(x = var_2114, y = var_2115_to_fp16)[name = string("attn_weights_1_cast_fp16")]; - tensor mask_slice_1_begin_0 = const()[name = string("mask_slice_1_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor mask_slice_1_end_0 = const()[name = string("mask_slice_1_end_0"), val = tensor([1, 1, 64, 512])]; - tensor mask_slice_1_end_mask_0 = const()[name = string("mask_slice_1_end_mask_0"), val = tensor([true, true, true, false])]; - tensor mask_slice_1 = slice_by_index(begin = mask_slice_1_begin_0, end = mask_slice_1_end_0, end_mask = mask_slice_1_end_mask_0, x = causal_mask)[name = string("mask_slice_1")]; - tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask_slice_1)[name = string("attn_weights_3_cast_fp16")]; - int32 var_2150 = const()[name = string("op_2150"), val = int32(-1)]; - tensor var_2152_cast_fp16 = softmax(axis = var_2150, x = attn_weights_3_cast_fp16)[name = string("op_2152_cast_fp16")]; - tensor concat_12 = const()[name = string("concat_12"), val = tensor([4, 64, 512])]; - tensor reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_2152_cast_fp16)[name = string("reshape_0_cast_fp16")]; - tensor concat_13 = const()[name = string("concat_13"), val = tensor([4, 512, 256])]; - tensor reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_11_cast_fp16)[name = string("reshape_1_cast_fp16")]; - bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; - bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)]; - tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; - tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 4, 64, 256])]; - tensor reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; - tensor var_2164_perm_0 = const()[name = string("op_2164_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_2183 = const()[name = string("op_2183"), val = tensor([1, 64, 1024])]; - tensor var_2164_cast_fp16 = transpose(perm = var_2164_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_231")]; - tensor attn_output_5_cast_fp16 = reshape(shape = var_2183, x = var_2164_cast_fp16)[name = string("attn_output_5_cast_fp16")]; - tensor var_2188 = const()[name = string("op_2188"), val = tensor([0, 2, 1])]; - string var_2204_pad_type_0 = const()[name = string("op_2204_pad_type_0"), val = string("valid")]; - int32 var_2204_groups_0 = const()[name = string("op_2204_groups_0"), val = int32(1)]; - tensor var_2204_strides_0 = const()[name = string("op_2204_strides_0"), val = tensor([1])]; - tensor var_2204_pad_0 = const()[name = string("op_2204_pad_0"), val = tensor([0, 0])]; - tensor var_2204_dilations_0 = const()[name = string("op_2204_dilations_0"), val = tensor([1])]; - tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43545984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44430784))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_2189_cast_fp16 = transpose(perm = var_2188, x = attn_output_5_cast_fp16)[name = string("transpose_230")]; - tensor var_2204_cast_fp16 = conv(dilations = var_2204_dilations_0, groups = var_2204_groups_0, pad = var_2204_pad_0, pad_type = var_2204_pad_type_0, strides = var_2204_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_2189_cast_fp16)[name = string("op_2204_cast_fp16")]; - tensor var_2208 = const()[name = string("op_2208"), val = tensor([0, 2, 1])]; - int32 var_2219 = const()[name = string("op_2219"), val = int32(-1)]; - fp16 const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_9_cast_fp16 = transpose(perm = var_2208, x = var_2204_cast_fp16)[name = string("transpose_229")]; - tensor var_2221_cast_fp16 = mul(x = hidden_states_9_cast_fp16, y = const_31_promoted_to_fp16)[name = string("op_2221_cast_fp16")]; - bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; - tensor input_11_cast_fp16 = concat(axis = var_2219, interleave = input_11_interleave_0, values = (hidden_states_9_cast_fp16, var_2221_cast_fp16))[name = string("input_11_cast_fp16")]; - tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; - fp16 var_2216_to_fp16 = const()[name = string("op_2216_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_2216_to_fp16, x = input_11_cast_fp16)[name = string("normed_13_cast_fp16")]; - tensor normed_15_begin_0 = const()[name = string("normed_15_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_15_end_0 = const()[name = string("normed_15_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_15_end_mask_0 = const()[name = string("normed_15_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_15_cast_fp16 = slice_by_index(begin = normed_15_begin_0, end = normed_15_end_0, end_mask = normed_15_end_mask_0, x = normed_13_cast_fp16)[name = string("normed_15_cast_fp16")]; - tensor var_2235_to_fp16 = const()[name = string("op_2235_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44449280)))]; - tensor attn_output_9_cast_fp16 = mul(x = normed_15_cast_fp16, y = var_2235_to_fp16)[name = string("attn_output_9_cast_fp16")]; - tensor hidden_states_11_cast_fp16 = add(x = hidden_states, y = attn_output_9_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; - int32 var_2248 = const()[name = string("op_2248"), val = int32(-1)]; - fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2250_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_2250_cast_fp16")]; - bool input_13_interleave_0 = const()[name = string("input_13_interleave_0"), val = bool(false)]; - tensor input_13_cast_fp16 = concat(axis = var_2248, interleave = input_13_interleave_0, values = (hidden_states_11_cast_fp16, var_2250_cast_fp16))[name = string("input_13_cast_fp16")]; - tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; - fp16 var_2245_to_fp16 = const()[name = string("op_2245_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_2245_to_fp16, x = input_13_cast_fp16)[name = string("normed_17_cast_fp16")]; - tensor normed_19_begin_0 = const()[name = string("normed_19_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_19_end_0 = const()[name = string("normed_19_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_19_end_mask_0 = const()[name = string("normed_19_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_19_cast_fp16 = slice_by_index(begin = normed_19_begin_0, end = normed_19_end_0, end_mask = normed_19_end_mask_0, x = normed_17_cast_fp16)[name = string("normed_19_cast_fp16")]; - tensor var_2264_to_fp16 = const()[name = string("op_2264_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44451648)))]; - tensor x_13_cast_fp16 = mul(x = normed_19_cast_fp16, y = var_2264_to_fp16)[name = string("x_13_cast_fp16")]; - tensor var_2276 = const()[name = string("op_2276"), val = tensor([0, 2, 1])]; - tensor input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor([2])]; - tensor var_2277_cast_fp16 = transpose(perm = var_2276, x = x_13_cast_fp16)[name = string("transpose_228")]; - tensor input_15_cast_fp16 = expand_dims(axes = input_15_axes_0, x = var_2277_cast_fp16)[name = string("input_15_cast_fp16")]; - string x_15_pad_type_0 = const()[name = string("x_15_pad_type_0"), val = string("valid")]; - tensor x_15_strides_0 = const()[name = string("x_15_strides_0"), val = tensor([1, 1])]; - tensor x_15_pad_0 = const()[name = string("x_15_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_15_dilations_0 = const()[name = string("x_15_dilations_0"), val = tensor([1, 1])]; - int32 x_15_groups_0 = const()[name = string("x_15_groups_0"), val = int32(1)]; - tensor model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(878759936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(884731968))))[name = string("model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_15_cast_fp16 = conv(dilations = x_15_dilations_0, groups = x_15_groups_0, pad = x_15_pad_0, pad_type = x_15_pad_type_0, strides = x_15_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_15_cast_fp16)[name = string("x_15_cast_fp16")]; - string b_1_pad_type_0 = const()[name = string("b_1_pad_type_0"), val = string("valid")]; - tensor b_1_strides_0 = const()[name = string("b_1_strides_0"), val = tensor([1, 1])]; - tensor b_1_pad_0 = const()[name = string("b_1_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_1_dilations_0 = const()[name = string("b_1_dilations_0"), val = tensor([1, 1])]; - int32 b_1_groups_0 = const()[name = string("b_1_groups_0"), val = int32(1)]; - tensor model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(884842624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(890814656))))[name = string("model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_1_cast_fp16 = conv(dilations = b_1_dilations_0, groups = b_1_groups_0, pad = b_1_pad_0, pad_type = b_1_pad_type_0, strides = b_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_15_cast_fp16)[name = string("b_1_cast_fp16")]; - string var_2302_mode_0 = const()[name = string("op_2302_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_2302_cast_fp16 = gelu(mode = var_2302_mode_0, x = x_15_cast_fp16)[name = string("op_2302_cast_fp16")]; - tensor input_17_cast_fp16 = mul(x = var_2302_cast_fp16, y = b_1_cast_fp16)[name = string("input_17_cast_fp16")]; - string e_1_pad_type_0 = const()[name = string("e_1_pad_type_0"), val = string("valid")]; - tensor e_1_strides_0 = const()[name = string("e_1_strides_0"), val = tensor([1, 1])]; - tensor e_1_pad_0 = const()[name = string("e_1_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_1_dilations_0 = const()[name = string("e_1_dilations_0"), val = tensor([1, 1])]; - int32 e_1_groups_0 = const()[name = string("e_1_groups_0"), val = int32(1)]; - tensor model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56619392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62591424))))[name = string("model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_1_cast_fp16 = conv(dilations = e_1_dilations_0, groups = e_1_groups_0, pad = e_1_pad_0, pad_type = e_1_pad_type_0, strides = e_1_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_17_cast_fp16)[name = string("e_1_cast_fp16")]; - tensor var_2310_axes_0 = const()[name = string("op_2310_axes_0"), val = tensor([2])]; - tensor var_2310_cast_fp16 = squeeze(axes = var_2310_axes_0, x = e_1_cast_fp16)[name = string("op_2310_cast_fp16")]; - tensor var_2311 = const()[name = string("op_2311"), val = tensor([0, 2, 1])]; - int32 var_2322 = const()[name = string("op_2322"), val = int32(-1)]; - fp16 const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_13_cast_fp16 = transpose(perm = var_2311, x = var_2310_cast_fp16)[name = string("transpose_227")]; - tensor var_2324_cast_fp16 = mul(x = hidden_states_13_cast_fp16, y = const_39_promoted_to_fp16)[name = string("op_2324_cast_fp16")]; - bool input_19_interleave_0 = const()[name = string("input_19_interleave_0"), val = bool(false)]; - tensor input_19_cast_fp16 = concat(axis = var_2322, interleave = input_19_interleave_0, values = (hidden_states_13_cast_fp16, var_2324_cast_fp16))[name = string("input_19_cast_fp16")]; - tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; - fp16 var_2319_to_fp16 = const()[name = string("op_2319_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_2319_to_fp16, x = input_19_cast_fp16)[name = string("normed_21_cast_fp16")]; - tensor normed_23_begin_0 = const()[name = string("normed_23_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_23_end_0 = const()[name = string("normed_23_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_23_end_mask_0 = const()[name = string("normed_23_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_23_cast_fp16 = slice_by_index(begin = normed_23_begin_0, end = normed_23_end_0, end_mask = normed_23_end_mask_0, x = normed_21_cast_fp16)[name = string("normed_23_cast_fp16")]; - tensor var_2338_to_fp16 = const()[name = string("op_2338_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62609920)))]; - tensor hidden_states_15_cast_fp16 = mul(x = normed_23_cast_fp16, y = var_2338_to_fp16)[name = string("hidden_states_15_cast_fp16")]; - tensor hidden_states_17_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; - int32 var_2392 = const()[name = string("op_2392"), val = int32(-1)]; - fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2394_cast_fp16 = mul(x = hidden_states_17_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_2394_cast_fp16")]; - bool input_21_interleave_0 = const()[name = string("input_21_interleave_0"), val = bool(false)]; - tensor input_21_cast_fp16 = concat(axis = var_2392, interleave = input_21_interleave_0, values = (hidden_states_17_cast_fp16, var_2394_cast_fp16))[name = string("input_21_cast_fp16")]; - tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; - fp16 var_2389_to_fp16 = const()[name = string("op_2389_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_2389_to_fp16, x = input_21_cast_fp16)[name = string("normed_25_cast_fp16")]; - tensor normed_27_begin_0 = const()[name = string("normed_27_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_27_end_0 = const()[name = string("normed_27_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_27_end_mask_0 = const()[name = string("normed_27_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_27_cast_fp16 = slice_by_index(begin = normed_27_begin_0, end = normed_27_end_0, end_mask = normed_27_end_mask_0, x = normed_25_cast_fp16)[name = string("normed_27_cast_fp16")]; - tensor var_2408_to_fp16 = const()[name = string("op_2408_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62612288)))]; - tensor hidden_states_19_cast_fp16 = mul(x = normed_27_cast_fp16, y = var_2408_to_fp16)[name = string("hidden_states_19_cast_fp16")]; - tensor var_2419 = const()[name = string("op_2419"), val = tensor([0, 2, 1])]; - tensor var_2422_axes_0 = const()[name = string("op_2422_axes_0"), val = tensor([2])]; - tensor var_2420_cast_fp16 = transpose(perm = var_2419, x = hidden_states_19_cast_fp16)[name = string("transpose_226")]; - tensor var_2422_cast_fp16 = expand_dims(axes = var_2422_axes_0, x = var_2420_cast_fp16)[name = string("op_2422_cast_fp16")]; - string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; - tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; - tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; - int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; - tensor query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_2422_cast_fp16)[name = string("query_states_9")]; - string key_states_11_pad_type_0 = const()[name = string("key_states_11_pad_type_0"), val = string("valid")]; - tensor key_states_11_strides_0 = const()[name = string("key_states_11_strides_0"), val = tensor([1, 1])]; - tensor key_states_11_pad_0 = const()[name = string("key_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_11_dilations_0 = const()[name = string("key_states_11_dilations_0"), val = tensor([1, 1])]; - int32 key_states_11_groups_0 = const()[name = string("key_states_11_groups_0"), val = int32(1)]; - tensor key_states_11 = conv(dilations = key_states_11_dilations_0, groups = key_states_11_groups_0, pad = key_states_11_pad_0, pad_type = key_states_11_pad_type_0, strides = key_states_11_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_2422_cast_fp16)[name = string("key_states_11")]; - string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; - tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; - tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; - int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; - tensor value_states_9 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_2422_cast_fp16)[name = string("value_states_9")]; - tensor var_2464 = const()[name = string("op_2464"), val = tensor([1, 4, 256, 64])]; - tensor var_2465 = reshape(shape = var_2464, x = query_states_9)[name = string("op_2465")]; - tensor var_2470 = const()[name = string("op_2470"), val = tensor([0, 1, 3, 2])]; - tensor var_2475 = const()[name = string("op_2475"), val = tensor([1, 1, 256, 64])]; - tensor var_2476 = reshape(shape = var_2475, x = key_states_11)[name = string("op_2476")]; - tensor var_2481 = const()[name = string("op_2481"), val = tensor([0, 1, 3, 2])]; - tensor var_2486 = const()[name = string("op_2486"), val = tensor([1, 1, 256, 64])]; - tensor var_2487 = reshape(shape = var_2486, x = value_states_9)[name = string("op_2487")]; - tensor var_2492 = const()[name = string("op_2492"), val = tensor([0, 1, 3, 2])]; - int32 var_2503 = const()[name = string("op_2503"), val = int32(-1)]; - fp16 const_49_promoted = const()[name = string("const_49_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_21 = transpose(perm = var_2470, x = var_2465)[name = string("transpose_225")]; - tensor var_2505 = mul(x = hidden_states_21, y = const_49_promoted)[name = string("op_2505")]; - bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; - tensor input_25 = concat(axis = var_2503, interleave = input_25_interleave_0, values = (hidden_states_21, var_2505))[name = string("input_25")]; - tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; - fp16 var_2500_to_fp16 = const()[name = string("op_2500_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_2500_to_fp16, x = input_25)[name = string("normed_29_cast_fp16")]; - tensor normed_31_begin_0 = const()[name = string("normed_31_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_31_end_0 = const()[name = string("normed_31_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_31_end_mask_0 = const()[name = string("normed_31_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_31 = slice_by_index(begin = normed_31_begin_0, end = normed_31_end_0, end_mask = normed_31_end_mask_0, x = normed_29_cast_fp16)[name = string("normed_31")]; - tensor var_2519_to_fp16 = const()[name = string("op_2519_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62614656)))]; - tensor q_3_cast_fp16 = mul(x = normed_31, y = var_2519_to_fp16)[name = string("q_3_cast_fp16")]; - int32 var_2530 = const()[name = string("op_2530"), val = int32(-1)]; - fp16 const_53_promoted = const()[name = string("const_53_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_23 = transpose(perm = var_2481, x = var_2476)[name = string("transpose_224")]; - tensor var_2532 = mul(x = hidden_states_23, y = const_53_promoted)[name = string("op_2532")]; - bool input_27_interleave_0 = const()[name = string("input_27_interleave_0"), val = bool(false)]; - tensor input_27 = concat(axis = var_2530, interleave = input_27_interleave_0, values = (hidden_states_23, var_2532))[name = string("input_27")]; - tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; - fp16 var_2527_to_fp16 = const()[name = string("op_2527_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_2527_to_fp16, x = input_27)[name = string("normed_33_cast_fp16")]; - tensor normed_35_begin_0 = const()[name = string("normed_35_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_35_end_0 = const()[name = string("normed_35_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_35_end_mask_0 = const()[name = string("normed_35_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_35 = slice_by_index(begin = normed_35_begin_0, end = normed_35_end_0, end_mask = normed_35_end_mask_0, x = normed_33_cast_fp16)[name = string("normed_35")]; - tensor var_2546_to_fp16 = const()[name = string("op_2546_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62615232)))]; - tensor k_3_cast_fp16 = mul(x = normed_35, y = var_2546_to_fp16)[name = string("k_3_cast_fp16")]; - tensor var_2560_cast_fp16 = mul(x = q_3_cast_fp16, y = cos_5)[name = string("op_2560_cast_fp16")]; - tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3_cast_fp16)[name = string("x1_5_cast_fp16")]; - tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3_cast_fp16)[name = string("x2_5_cast_fp16")]; - fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2581_cast_fp16 = mul(x = x2_5_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_2581_cast_fp16")]; - int32 var_2583 = const()[name = string("op_2583"), val = int32(-1)]; - bool var_2584_interleave_0 = const()[name = string("op_2584_interleave_0"), val = bool(false)]; - tensor var_2584_cast_fp16 = concat(axis = var_2583, interleave = var_2584_interleave_0, values = (var_2581_cast_fp16, x1_5_cast_fp16))[name = string("op_2584_cast_fp16")]; - tensor var_2585_cast_fp16 = mul(x = var_2584_cast_fp16, y = sin_5)[name = string("op_2585_cast_fp16")]; - tensor query_states_11_cast_fp16 = add(x = var_2560_cast_fp16, y = var_2585_cast_fp16)[name = string("query_states_11_cast_fp16")]; - tensor var_2588_cast_fp16 = mul(x = k_3_cast_fp16, y = cos_5)[name = string("op_2588_cast_fp16")]; - tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3_cast_fp16)[name = string("x1_7_cast_fp16")]; - tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3_cast_fp16)[name = string("x2_7_cast_fp16")]; - fp16 const_62_promoted_to_fp16 = const()[name = string("const_62_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2609_cast_fp16 = mul(x = x2_7_cast_fp16, y = const_62_promoted_to_fp16)[name = string("op_2609_cast_fp16")]; - int32 var_2611 = const()[name = string("op_2611"), val = int32(-1)]; - bool var_2612_interleave_0 = const()[name = string("op_2612_interleave_0"), val = bool(false)]; - tensor var_2612_cast_fp16 = concat(axis = var_2611, interleave = var_2612_interleave_0, values = (var_2609_cast_fp16, x1_7_cast_fp16))[name = string("op_2612_cast_fp16")]; - tensor var_2613_cast_fp16 = mul(x = var_2612_cast_fp16, y = sin_5)[name = string("op_2613_cast_fp16")]; - tensor key_states_13_cast_fp16 = add(x = var_2588_cast_fp16, y = var_2613_cast_fp16)[name = string("key_states_13_cast_fp16")]; - tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; - tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; - tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; - tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; - int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; - bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; - tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")]; - tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; - tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; - int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; - bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; - tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, end_pos_1, concat_21_values3_0))[name = string("concat_21")]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_3_stride_0, update = key_states_13_cast_fp16, x = coreml_update_state_53)[name = string("model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_106_write_state")]; - tensor coreml_update_state_54 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_106")]; - tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([23])]; - tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; - tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; - tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([24])]; - int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; - bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; - tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")]; - tensor concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor([0])]; - tensor concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor([0])]; - int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)]; - bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)]; - tensor concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, end_pos_1, concat_25_values3_0))[name = string("concat_25")]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_11 = transpose(perm = var_2492, x = var_2487)[name = string("transpose_223")]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_4_stride_0, update = value_states_11, x = coreml_update_state_54)[name = string("model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_107_write_state")]; - tensor coreml_update_state_55 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_107")]; - tensor var_2712_begin_0 = const()[name = string("op_2712_begin_0"), val = tensor([1, 0, 0, 0])]; - tensor var_2712_end_0 = const()[name = string("op_2712_end_0"), val = tensor([2, 1, 512, 256])]; - tensor var_2712_end_mask_0 = const()[name = string("op_2712_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_2712_cast_fp16 = slice_by_index(begin = var_2712_begin_0, end = var_2712_end_0, end_mask = var_2712_end_mask_0, x = coreml_update_state_55)[name = string("op_2712_cast_fp16")]; - tensor var_2719_begin_0 = const()[name = string("op_2719_begin_0"), val = tensor([23, 0, 0, 0])]; - tensor var_2719_end_0 = const()[name = string("op_2719_end_0"), val = tensor([24, 1, 512, 256])]; - tensor var_2719_end_mask_0 = const()[name = string("op_2719_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_2719_cast_fp16 = slice_by_index(begin = var_2719_begin_0, end = var_2719_end_0, end_mask = var_2719_end_mask_0, x = coreml_update_state_55)[name = string("op_2719_cast_fp16")]; - tensor var_2758 = const()[name = string("op_2758"), val = tensor([1, 4, 1, 1])]; - tensor x_21_cast_fp16 = tile(reps = var_2758, x = var_2712_cast_fp16)[name = string("x_21_cast_fp16")]; - tensor var_2778 = const()[name = string("op_2778"), val = tensor([1, 4, 1, 1])]; - tensor x_27_cast_fp16 = tile(reps = var_2778, x = var_2719_cast_fp16)[name = string("x_27_cast_fp16")]; - bool var_2805_transpose_x_0 = const()[name = string("op_2805_transpose_x_0"), val = bool(false)]; - bool var_2805_transpose_y_0 = const()[name = string("op_2805_transpose_y_0"), val = bool(true)]; - tensor var_2805 = matmul(transpose_x = var_2805_transpose_x_0, transpose_y = var_2805_transpose_y_0, x = query_states_11_cast_fp16, y = x_21_cast_fp16)[name = string("op_2805")]; - fp16 var_2806_to_fp16 = const()[name = string("op_2806_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_5_cast_fp16 = mul(x = var_2805, y = var_2806_to_fp16)[name = string("attn_weights_5_cast_fp16")]; - tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask_slice_1)[name = string("attn_weights_7_cast_fp16")]; - int32 var_2841 = const()[name = string("op_2841"), val = int32(-1)]; - tensor var_2843_cast_fp16 = softmax(axis = var_2841, x = attn_weights_7_cast_fp16)[name = string("op_2843_cast_fp16")]; - tensor concat_30 = const()[name = string("concat_30"), val = tensor([4, 64, 512])]; - tensor reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_2843_cast_fp16)[name = string("reshape_3_cast_fp16")]; - tensor concat_31 = const()[name = string("concat_31"), val = tensor([4, 512, 256])]; - tensor reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_27_cast_fp16)[name = string("reshape_4_cast_fp16")]; - bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; - bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)]; - tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; - tensor concat_35 = const()[name = string("concat_35"), val = tensor([1, 4, 64, 256])]; - tensor reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; - tensor var_2855_perm_0 = const()[name = string("op_2855_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_2874 = const()[name = string("op_2874"), val = tensor([1, 64, 1024])]; - tensor var_2855_cast_fp16 = transpose(perm = var_2855_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_222")]; - tensor attn_output_15_cast_fp16 = reshape(shape = var_2874, x = var_2855_cast_fp16)[name = string("attn_output_15_cast_fp16")]; - tensor var_2879 = const()[name = string("op_2879"), val = tensor([0, 2, 1])]; - string var_2895_pad_type_0 = const()[name = string("op_2895_pad_type_0"), val = string("valid")]; - int32 var_2895_groups_0 = const()[name = string("op_2895_groups_0"), val = int32(1)]; - tensor var_2895_strides_0 = const()[name = string("op_2895_strides_0"), val = tensor([1])]; - tensor var_2895_pad_0 = const()[name = string("op_2895_pad_0"), val = tensor([0, 0])]; - tensor var_2895_dilations_0 = const()[name = string("op_2895_dilations_0"), val = tensor([1])]; - tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62615808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63500608))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_2880_cast_fp16 = transpose(perm = var_2879, x = attn_output_15_cast_fp16)[name = string("transpose_221")]; - tensor var_2895_cast_fp16 = conv(dilations = var_2895_dilations_0, groups = var_2895_groups_0, pad = var_2895_pad_0, pad_type = var_2895_pad_type_0, strides = var_2895_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_2880_cast_fp16)[name = string("op_2895_cast_fp16")]; - tensor var_2899 = const()[name = string("op_2899"), val = tensor([0, 2, 1])]; - int32 var_2910 = const()[name = string("op_2910"), val = int32(-1)]; - fp16 const_74_promoted_to_fp16 = const()[name = string("const_74_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_25_cast_fp16 = transpose(perm = var_2899, x = var_2895_cast_fp16)[name = string("transpose_220")]; - tensor var_2912_cast_fp16 = mul(x = hidden_states_25_cast_fp16, y = const_74_promoted_to_fp16)[name = string("op_2912_cast_fp16")]; - bool input_31_interleave_0 = const()[name = string("input_31_interleave_0"), val = bool(false)]; - tensor input_31_cast_fp16 = concat(axis = var_2910, interleave = input_31_interleave_0, values = (hidden_states_25_cast_fp16, var_2912_cast_fp16))[name = string("input_31_cast_fp16")]; - tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; - fp16 var_2907_to_fp16 = const()[name = string("op_2907_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_2907_to_fp16, x = input_31_cast_fp16)[name = string("normed_37_cast_fp16")]; - tensor normed_39_begin_0 = const()[name = string("normed_39_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_39_end_0 = const()[name = string("normed_39_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_39_end_mask_0 = const()[name = string("normed_39_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_39_cast_fp16 = slice_by_index(begin = normed_39_begin_0, end = normed_39_end_0, end_mask = normed_39_end_mask_0, x = normed_37_cast_fp16)[name = string("normed_39_cast_fp16")]; - tensor var_2926_to_fp16 = const()[name = string("op_2926_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63519104)))]; - tensor attn_output_19_cast_fp16 = mul(x = normed_39_cast_fp16, y = var_2926_to_fp16)[name = string("attn_output_19_cast_fp16")]; - tensor hidden_states_27_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = attn_output_19_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; - int32 var_2939 = const()[name = string("op_2939"), val = int32(-1)]; - fp16 const_78_promoted_to_fp16 = const()[name = string("const_78_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2941_cast_fp16 = mul(x = hidden_states_27_cast_fp16, y = const_78_promoted_to_fp16)[name = string("op_2941_cast_fp16")]; - bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; - tensor input_33_cast_fp16 = concat(axis = var_2939, interleave = input_33_interleave_0, values = (hidden_states_27_cast_fp16, var_2941_cast_fp16))[name = string("input_33_cast_fp16")]; - tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; - fp16 var_2936_to_fp16 = const()[name = string("op_2936_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_2936_to_fp16, x = input_33_cast_fp16)[name = string("normed_41_cast_fp16")]; - tensor normed_43_begin_0 = const()[name = string("normed_43_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_43_end_0 = const()[name = string("normed_43_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_43_end_mask_0 = const()[name = string("normed_43_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_43_cast_fp16 = slice_by_index(begin = normed_43_begin_0, end = normed_43_end_0, end_mask = normed_43_end_mask_0, x = normed_41_cast_fp16)[name = string("normed_43_cast_fp16")]; - tensor var_2955_to_fp16 = const()[name = string("op_2955_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63521472)))]; - tensor x_29_cast_fp16 = mul(x = normed_43_cast_fp16, y = var_2955_to_fp16)[name = string("x_29_cast_fp16")]; - tensor var_2967 = const()[name = string("op_2967"), val = tensor([0, 2, 1])]; - tensor input_35_axes_0 = const()[name = string("input_35_axes_0"), val = tensor([2])]; - tensor var_2968_cast_fp16 = transpose(perm = var_2967, x = x_29_cast_fp16)[name = string("transpose_219")]; - tensor input_35_cast_fp16 = expand_dims(axes = input_35_axes_0, x = var_2968_cast_fp16)[name = string("input_35_cast_fp16")]; - string x_31_pad_type_0 = const()[name = string("x_31_pad_type_0"), val = string("valid")]; - tensor x_31_strides_0 = const()[name = string("x_31_strides_0"), val = tensor([1, 1])]; - tensor x_31_pad_0 = const()[name = string("x_31_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_31_dilations_0 = const()[name = string("x_31_dilations_0"), val = tensor([1, 1])]; - int32 x_31_groups_0 = const()[name = string("x_31_groups_0"), val = int32(1)]; - tensor model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(890925312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896897344))))[name = string("model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_31_cast_fp16 = conv(dilations = x_31_dilations_0, groups = x_31_groups_0, pad = x_31_pad_0, pad_type = x_31_pad_type_0, strides = x_31_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_35_cast_fp16)[name = string("x_31_cast_fp16")]; - string b_3_pad_type_0 = const()[name = string("b_3_pad_type_0"), val = string("valid")]; - tensor b_3_strides_0 = const()[name = string("b_3_strides_0"), val = tensor([1, 1])]; - tensor b_3_pad_0 = const()[name = string("b_3_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_3_dilations_0 = const()[name = string("b_3_dilations_0"), val = tensor([1, 1])]; - int32 b_3_groups_0 = const()[name = string("b_3_groups_0"), val = int32(1)]; - tensor model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(897008000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902980032))))[name = string("model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_3_cast_fp16 = conv(dilations = b_3_dilations_0, groups = b_3_groups_0, pad = b_3_pad_0, pad_type = b_3_pad_type_0, strides = b_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_35_cast_fp16)[name = string("b_3_cast_fp16")]; - string var_2993_mode_0 = const()[name = string("op_2993_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_2993_cast_fp16 = gelu(mode = var_2993_mode_0, x = x_31_cast_fp16)[name = string("op_2993_cast_fp16")]; - tensor input_37_cast_fp16 = mul(x = var_2993_cast_fp16, y = b_3_cast_fp16)[name = string("input_37_cast_fp16")]; - string e_3_pad_type_0 = const()[name = string("e_3_pad_type_0"), val = string("valid")]; - tensor e_3_strides_0 = const()[name = string("e_3_strides_0"), val = tensor([1, 1])]; - tensor e_3_pad_0 = const()[name = string("e_3_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_3_dilations_0 = const()[name = string("e_3_dilations_0"), val = tensor([1, 1])]; - int32 e_3_groups_0 = const()[name = string("e_3_groups_0"), val = int32(1)]; - tensor model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75689216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81661248))))[name = string("model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_3_cast_fp16 = conv(dilations = e_3_dilations_0, groups = e_3_groups_0, pad = e_3_pad_0, pad_type = e_3_pad_type_0, strides = e_3_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_37_cast_fp16)[name = string("e_3_cast_fp16")]; - tensor var_3001_axes_0 = const()[name = string("op_3001_axes_0"), val = tensor([2])]; - tensor var_3001_cast_fp16 = squeeze(axes = var_3001_axes_0, x = e_3_cast_fp16)[name = string("op_3001_cast_fp16")]; - tensor var_3002 = const()[name = string("op_3002"), val = tensor([0, 2, 1])]; - int32 var_3013 = const()[name = string("op_3013"), val = int32(-1)]; - fp16 const_82_promoted_to_fp16 = const()[name = string("const_82_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_29_cast_fp16 = transpose(perm = var_3002, x = var_3001_cast_fp16)[name = string("transpose_218")]; - tensor var_3015_cast_fp16 = mul(x = hidden_states_29_cast_fp16, y = const_82_promoted_to_fp16)[name = string("op_3015_cast_fp16")]; - bool input_39_interleave_0 = const()[name = string("input_39_interleave_0"), val = bool(false)]; - tensor input_39_cast_fp16 = concat(axis = var_3013, interleave = input_39_interleave_0, values = (hidden_states_29_cast_fp16, var_3015_cast_fp16))[name = string("input_39_cast_fp16")]; - tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; - fp16 var_3010_to_fp16 = const()[name = string("op_3010_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_3010_to_fp16, x = input_39_cast_fp16)[name = string("normed_45_cast_fp16")]; - tensor normed_47_begin_0 = const()[name = string("normed_47_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_47_end_0 = const()[name = string("normed_47_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_47_end_mask_0 = const()[name = string("normed_47_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_47_cast_fp16 = slice_by_index(begin = normed_47_begin_0, end = normed_47_end_0, end_mask = normed_47_end_mask_0, x = normed_45_cast_fp16)[name = string("normed_47_cast_fp16")]; - tensor var_3029_to_fp16 = const()[name = string("op_3029_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81679744)))]; - tensor hidden_states_31_cast_fp16 = mul(x = normed_47_cast_fp16, y = var_3029_to_fp16)[name = string("hidden_states_31_cast_fp16")]; - tensor hidden_states_33_cast_fp16 = add(x = hidden_states_27_cast_fp16, y = hidden_states_31_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; - int32 var_3083 = const()[name = string("op_3083"), val = int32(-1)]; - fp16 const_87_promoted_to_fp16 = const()[name = string("const_87_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3085_cast_fp16 = mul(x = hidden_states_33_cast_fp16, y = const_87_promoted_to_fp16)[name = string("op_3085_cast_fp16")]; - bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; - tensor input_41_cast_fp16 = concat(axis = var_3083, interleave = input_41_interleave_0, values = (hidden_states_33_cast_fp16, var_3085_cast_fp16))[name = string("input_41_cast_fp16")]; - tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; - fp16 var_3080_to_fp16 = const()[name = string("op_3080_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_3080_to_fp16, x = input_41_cast_fp16)[name = string("normed_49_cast_fp16")]; - tensor normed_51_begin_0 = const()[name = string("normed_51_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_51_end_0 = const()[name = string("normed_51_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_51_end_mask_0 = const()[name = string("normed_51_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_51_cast_fp16 = slice_by_index(begin = normed_51_begin_0, end = normed_51_end_0, end_mask = normed_51_end_mask_0, x = normed_49_cast_fp16)[name = string("normed_51_cast_fp16")]; - tensor var_3099_to_fp16 = const()[name = string("op_3099_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81682112)))]; - tensor hidden_states_35_cast_fp16 = mul(x = normed_51_cast_fp16, y = var_3099_to_fp16)[name = string("hidden_states_35_cast_fp16")]; - tensor var_3110 = const()[name = string("op_3110"), val = tensor([0, 2, 1])]; - tensor var_3113_axes_0 = const()[name = string("op_3113_axes_0"), val = tensor([2])]; - tensor var_3111_cast_fp16 = transpose(perm = var_3110, x = hidden_states_35_cast_fp16)[name = string("transpose_217")]; - tensor var_3113_cast_fp16 = expand_dims(axes = var_3113_axes_0, x = var_3111_cast_fp16)[name = string("op_3113_cast_fp16")]; - string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; - tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; - tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; - int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; - tensor query_states_17 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_3113_cast_fp16)[name = string("query_states_17")]; - string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; - tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; - tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; - int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; - tensor key_states_21 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_3113_cast_fp16)[name = string("key_states_21")]; - string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; - tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; - tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; - int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; - tensor value_states_17 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_3113_cast_fp16)[name = string("value_states_17")]; - tensor var_3155 = const()[name = string("op_3155"), val = tensor([1, 4, 256, 64])]; - tensor var_3156 = reshape(shape = var_3155, x = query_states_17)[name = string("op_3156")]; - tensor var_3161 = const()[name = string("op_3161"), val = tensor([0, 1, 3, 2])]; - tensor var_3166 = const()[name = string("op_3166"), val = tensor([1, 1, 256, 64])]; - tensor var_3167 = reshape(shape = var_3166, x = key_states_21)[name = string("op_3167")]; - tensor var_3172 = const()[name = string("op_3172"), val = tensor([0, 1, 3, 2])]; - tensor var_3177 = const()[name = string("op_3177"), val = tensor([1, 1, 256, 64])]; - tensor var_3178 = reshape(shape = var_3177, x = value_states_17)[name = string("op_3178")]; - tensor var_3183 = const()[name = string("op_3183"), val = tensor([0, 1, 3, 2])]; - int32 var_3194 = const()[name = string("op_3194"), val = int32(-1)]; - fp16 const_92_promoted = const()[name = string("const_92_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_37 = transpose(perm = var_3161, x = var_3156)[name = string("transpose_216")]; - tensor var_3196 = mul(x = hidden_states_37, y = const_92_promoted)[name = string("op_3196")]; - bool input_45_interleave_0 = const()[name = string("input_45_interleave_0"), val = bool(false)]; - tensor input_45 = concat(axis = var_3194, interleave = input_45_interleave_0, values = (hidden_states_37, var_3196))[name = string("input_45")]; - tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; - fp16 var_3191_to_fp16 = const()[name = string("op_3191_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_3191_to_fp16, x = input_45)[name = string("normed_53_cast_fp16")]; - tensor normed_55_begin_0 = const()[name = string("normed_55_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_55_end_0 = const()[name = string("normed_55_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_55_end_mask_0 = const()[name = string("normed_55_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_55 = slice_by_index(begin = normed_55_begin_0, end = normed_55_end_0, end_mask = normed_55_end_mask_0, x = normed_53_cast_fp16)[name = string("normed_55")]; - tensor var_3210_to_fp16 = const()[name = string("op_3210_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81684480)))]; - tensor q_5_cast_fp16 = mul(x = normed_55, y = var_3210_to_fp16)[name = string("q_5_cast_fp16")]; - int32 var_3221 = const()[name = string("op_3221"), val = int32(-1)]; - fp16 const_96_promoted = const()[name = string("const_96_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_39 = transpose(perm = var_3172, x = var_3167)[name = string("transpose_215")]; - tensor var_3223 = mul(x = hidden_states_39, y = const_96_promoted)[name = string("op_3223")]; - bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; - tensor input_47 = concat(axis = var_3221, interleave = input_47_interleave_0, values = (hidden_states_39, var_3223))[name = string("input_47")]; - tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; - fp16 var_3218_to_fp16 = const()[name = string("op_3218_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_3218_to_fp16, x = input_47)[name = string("normed_57_cast_fp16")]; - tensor normed_59_begin_0 = const()[name = string("normed_59_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_59_end_0 = const()[name = string("normed_59_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_59_end_mask_0 = const()[name = string("normed_59_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_59 = slice_by_index(begin = normed_59_begin_0, end = normed_59_end_0, end_mask = normed_59_end_mask_0, x = normed_57_cast_fp16)[name = string("normed_59")]; - tensor var_3237_to_fp16 = const()[name = string("op_3237_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81685056)))]; - tensor k_5_cast_fp16 = mul(x = normed_59, y = var_3237_to_fp16)[name = string("k_5_cast_fp16")]; - tensor var_3251_cast_fp16 = mul(x = q_5_cast_fp16, y = cos_5)[name = string("op_3251_cast_fp16")]; - tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5_cast_fp16)[name = string("x1_9_cast_fp16")]; - tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5_cast_fp16)[name = string("x2_9_cast_fp16")]; - fp16 const_102_promoted_to_fp16 = const()[name = string("const_102_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3272_cast_fp16 = mul(x = x2_9_cast_fp16, y = const_102_promoted_to_fp16)[name = string("op_3272_cast_fp16")]; - int32 var_3274 = const()[name = string("op_3274"), val = int32(-1)]; - bool var_3275_interleave_0 = const()[name = string("op_3275_interleave_0"), val = bool(false)]; - tensor var_3275_cast_fp16 = concat(axis = var_3274, interleave = var_3275_interleave_0, values = (var_3272_cast_fp16, x1_9_cast_fp16))[name = string("op_3275_cast_fp16")]; - tensor var_3276_cast_fp16 = mul(x = var_3275_cast_fp16, y = sin_5)[name = string("op_3276_cast_fp16")]; - tensor query_states_19_cast_fp16 = add(x = var_3251_cast_fp16, y = var_3276_cast_fp16)[name = string("query_states_19_cast_fp16")]; - tensor var_3279_cast_fp16 = mul(x = k_5_cast_fp16, y = cos_5)[name = string("op_3279_cast_fp16")]; - tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5_cast_fp16)[name = string("x1_11_cast_fp16")]; - tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5_cast_fp16)[name = string("x2_11_cast_fp16")]; - fp16 const_105_promoted_to_fp16 = const()[name = string("const_105_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3300_cast_fp16 = mul(x = x2_11_cast_fp16, y = const_105_promoted_to_fp16)[name = string("op_3300_cast_fp16")]; - int32 var_3302 = const()[name = string("op_3302"), val = int32(-1)]; - bool var_3303_interleave_0 = const()[name = string("op_3303_interleave_0"), val = bool(false)]; - tensor var_3303_cast_fp16 = concat(axis = var_3302, interleave = var_3303_interleave_0, values = (var_3300_cast_fp16, x1_11_cast_fp16))[name = string("op_3303_cast_fp16")]; - tensor var_3304_cast_fp16 = mul(x = var_3303_cast_fp16, y = sin_5)[name = string("op_3304_cast_fp16")]; - tensor key_states_23_cast_fp16 = add(x = var_3279_cast_fp16, y = var_3304_cast_fp16)[name = string("key_states_23_cast_fp16")]; - tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; - tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; - tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; - tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; - int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; - bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; - tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")]; - tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; - tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; - int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; - bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; - tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, end_pos_1, concat_39_values3_0))[name = string("concat_39")]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_5_stride_0, update = key_states_23_cast_fp16, x = coreml_update_state_55)[name = string("model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_108_write_state")]; - tensor coreml_update_state_56 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_108")]; - tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([24])]; - tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; - tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; - tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([25])]; - int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; - bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; - tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")]; - tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; - tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; - int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; - bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; - tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, end_pos_1, concat_43_values3_0))[name = string("concat_43")]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_19 = transpose(perm = var_3183, x = var_3178)[name = string("transpose_214")]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_6_stride_0, update = value_states_19, x = coreml_update_state_56)[name = string("model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_109_write_state")]; - tensor coreml_update_state_57 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_109")]; - tensor var_3403_begin_0 = const()[name = string("op_3403_begin_0"), val = tensor([2, 0, 0, 0])]; - tensor var_3403_end_0 = const()[name = string("op_3403_end_0"), val = tensor([3, 1, 512, 256])]; - tensor var_3403_end_mask_0 = const()[name = string("op_3403_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_3403_cast_fp16 = slice_by_index(begin = var_3403_begin_0, end = var_3403_end_0, end_mask = var_3403_end_mask_0, x = coreml_update_state_57)[name = string("op_3403_cast_fp16")]; - tensor var_3410_begin_0 = const()[name = string("op_3410_begin_0"), val = tensor([24, 0, 0, 0])]; - tensor var_3410_end_0 = const()[name = string("op_3410_end_0"), val = tensor([25, 1, 512, 256])]; - tensor var_3410_end_mask_0 = const()[name = string("op_3410_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_3410_cast_fp16 = slice_by_index(begin = var_3410_begin_0, end = var_3410_end_0, end_mask = var_3410_end_mask_0, x = coreml_update_state_57)[name = string("op_3410_cast_fp16")]; - tensor var_3449 = const()[name = string("op_3449"), val = tensor([1, 4, 1, 1])]; - tensor x_37_cast_fp16 = tile(reps = var_3449, x = var_3403_cast_fp16)[name = string("x_37_cast_fp16")]; - tensor var_3469 = const()[name = string("op_3469"), val = tensor([1, 4, 1, 1])]; - tensor x_43_cast_fp16 = tile(reps = var_3469, x = var_3410_cast_fp16)[name = string("x_43_cast_fp16")]; - bool var_3496_transpose_x_0 = const()[name = string("op_3496_transpose_x_0"), val = bool(false)]; - bool var_3496_transpose_y_0 = const()[name = string("op_3496_transpose_y_0"), val = bool(true)]; - tensor var_3496 = matmul(transpose_x = var_3496_transpose_x_0, transpose_y = var_3496_transpose_y_0, x = query_states_19_cast_fp16, y = x_37_cast_fp16)[name = string("op_3496")]; - fp16 var_3497_to_fp16 = const()[name = string("op_3497_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_9_cast_fp16 = mul(x = var_3496, y = var_3497_to_fp16)[name = string("attn_weights_9_cast_fp16")]; - tensor attn_weights_11_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask_slice_1)[name = string("attn_weights_11_cast_fp16")]; - int32 var_3532 = const()[name = string("op_3532"), val = int32(-1)]; - tensor var_3534_cast_fp16 = softmax(axis = var_3532, x = attn_weights_11_cast_fp16)[name = string("op_3534_cast_fp16")]; - tensor concat_48 = const()[name = string("concat_48"), val = tensor([4, 64, 512])]; - tensor reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_3534_cast_fp16)[name = string("reshape_6_cast_fp16")]; - tensor concat_49 = const()[name = string("concat_49"), val = tensor([4, 512, 256])]; - tensor reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_43_cast_fp16)[name = string("reshape_7_cast_fp16")]; - bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; - bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)]; - tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; - tensor concat_53 = const()[name = string("concat_53"), val = tensor([1, 4, 64, 256])]; - tensor reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; - tensor var_3546_perm_0 = const()[name = string("op_3546_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_3565 = const()[name = string("op_3565"), val = tensor([1, 64, 1024])]; - tensor var_3546_cast_fp16 = transpose(perm = var_3546_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_213")]; - tensor attn_output_25_cast_fp16 = reshape(shape = var_3565, x = var_3546_cast_fp16)[name = string("attn_output_25_cast_fp16")]; - tensor var_3570 = const()[name = string("op_3570"), val = tensor([0, 2, 1])]; - string var_3586_pad_type_0 = const()[name = string("op_3586_pad_type_0"), val = string("valid")]; - int32 var_3586_groups_0 = const()[name = string("op_3586_groups_0"), val = int32(1)]; - tensor var_3586_strides_0 = const()[name = string("op_3586_strides_0"), val = tensor([1])]; - tensor var_3586_pad_0 = const()[name = string("op_3586_pad_0"), val = tensor([0, 0])]; - tensor var_3586_dilations_0 = const()[name = string("op_3586_dilations_0"), val = tensor([1])]; - tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81685632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82570432))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_3571_cast_fp16 = transpose(perm = var_3570, x = attn_output_25_cast_fp16)[name = string("transpose_212")]; - tensor var_3586_cast_fp16 = conv(dilations = var_3586_dilations_0, groups = var_3586_groups_0, pad = var_3586_pad_0, pad_type = var_3586_pad_type_0, strides = var_3586_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_3571_cast_fp16)[name = string("op_3586_cast_fp16")]; - tensor var_3590 = const()[name = string("op_3590"), val = tensor([0, 2, 1])]; - int32 var_3601 = const()[name = string("op_3601"), val = int32(-1)]; - fp16 const_117_promoted_to_fp16 = const()[name = string("const_117_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_41_cast_fp16 = transpose(perm = var_3590, x = var_3586_cast_fp16)[name = string("transpose_211")]; - tensor var_3603_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_117_promoted_to_fp16)[name = string("op_3603_cast_fp16")]; - bool input_51_interleave_0 = const()[name = string("input_51_interleave_0"), val = bool(false)]; - tensor input_51_cast_fp16 = concat(axis = var_3601, interleave = input_51_interleave_0, values = (hidden_states_41_cast_fp16, var_3603_cast_fp16))[name = string("input_51_cast_fp16")]; - tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; - fp16 var_3598_to_fp16 = const()[name = string("op_3598_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_3598_to_fp16, x = input_51_cast_fp16)[name = string("normed_61_cast_fp16")]; - tensor normed_63_begin_0 = const()[name = string("normed_63_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_63_end_0 = const()[name = string("normed_63_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_63_end_mask_0 = const()[name = string("normed_63_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_63_cast_fp16 = slice_by_index(begin = normed_63_begin_0, end = normed_63_end_0, end_mask = normed_63_end_mask_0, x = normed_61_cast_fp16)[name = string("normed_63_cast_fp16")]; - tensor var_3617_to_fp16 = const()[name = string("op_3617_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82588928)))]; - tensor attn_output_29_cast_fp16 = mul(x = normed_63_cast_fp16, y = var_3617_to_fp16)[name = string("attn_output_29_cast_fp16")]; - tensor hidden_states_43_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = attn_output_29_cast_fp16)[name = string("hidden_states_43_cast_fp16")]; - int32 var_3630 = const()[name = string("op_3630"), val = int32(-1)]; - fp16 const_121_promoted_to_fp16 = const()[name = string("const_121_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3632_cast_fp16 = mul(x = hidden_states_43_cast_fp16, y = const_121_promoted_to_fp16)[name = string("op_3632_cast_fp16")]; - bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; - tensor input_53_cast_fp16 = concat(axis = var_3630, interleave = input_53_interleave_0, values = (hidden_states_43_cast_fp16, var_3632_cast_fp16))[name = string("input_53_cast_fp16")]; - tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; - fp16 var_3627_to_fp16 = const()[name = string("op_3627_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_3627_to_fp16, x = input_53_cast_fp16)[name = string("normed_65_cast_fp16")]; - tensor normed_67_begin_0 = const()[name = string("normed_67_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_67_end_0 = const()[name = string("normed_67_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_67_end_mask_0 = const()[name = string("normed_67_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_67_cast_fp16 = slice_by_index(begin = normed_67_begin_0, end = normed_67_end_0, end_mask = normed_67_end_mask_0, x = normed_65_cast_fp16)[name = string("normed_67_cast_fp16")]; - tensor var_3646_to_fp16 = const()[name = string("op_3646_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82591296)))]; - tensor x_45_cast_fp16 = mul(x = normed_67_cast_fp16, y = var_3646_to_fp16)[name = string("x_45_cast_fp16")]; - tensor var_3658 = const()[name = string("op_3658"), val = tensor([0, 2, 1])]; - tensor input_55_axes_0 = const()[name = string("input_55_axes_0"), val = tensor([2])]; - tensor var_3659_cast_fp16 = transpose(perm = var_3658, x = x_45_cast_fp16)[name = string("transpose_210")]; - tensor input_55_cast_fp16 = expand_dims(axes = input_55_axes_0, x = var_3659_cast_fp16)[name = string("input_55_cast_fp16")]; - string x_47_pad_type_0 = const()[name = string("x_47_pad_type_0"), val = string("valid")]; - tensor x_47_strides_0 = const()[name = string("x_47_strides_0"), val = tensor([1, 1])]; - tensor x_47_pad_0 = const()[name = string("x_47_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_47_dilations_0 = const()[name = string("x_47_dilations_0"), val = tensor([1, 1])]; - int32 x_47_groups_0 = const()[name = string("x_47_groups_0"), val = int32(1)]; - tensor model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(903090688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(909062720))))[name = string("model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_47_cast_fp16 = conv(dilations = x_47_dilations_0, groups = x_47_groups_0, pad = x_47_pad_0, pad_type = x_47_pad_type_0, strides = x_47_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_55_cast_fp16)[name = string("x_47_cast_fp16")]; - string b_5_pad_type_0 = const()[name = string("b_5_pad_type_0"), val = string("valid")]; - tensor b_5_strides_0 = const()[name = string("b_5_strides_0"), val = tensor([1, 1])]; - tensor b_5_pad_0 = const()[name = string("b_5_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_5_dilations_0 = const()[name = string("b_5_dilations_0"), val = tensor([1, 1])]; - int32 b_5_groups_0 = const()[name = string("b_5_groups_0"), val = int32(1)]; - tensor model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(909173376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(915145408))))[name = string("model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_5_cast_fp16 = conv(dilations = b_5_dilations_0, groups = b_5_groups_0, pad = b_5_pad_0, pad_type = b_5_pad_type_0, strides = b_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_55_cast_fp16)[name = string("b_5_cast_fp16")]; - string var_3684_mode_0 = const()[name = string("op_3684_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_3684_cast_fp16 = gelu(mode = var_3684_mode_0, x = x_47_cast_fp16)[name = string("op_3684_cast_fp16")]; - tensor input_57_cast_fp16 = mul(x = var_3684_cast_fp16, y = b_5_cast_fp16)[name = string("input_57_cast_fp16")]; - string e_5_pad_type_0 = const()[name = string("e_5_pad_type_0"), val = string("valid")]; - tensor e_5_strides_0 = const()[name = string("e_5_strides_0"), val = tensor([1, 1])]; - tensor e_5_pad_0 = const()[name = string("e_5_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_5_dilations_0 = const()[name = string("e_5_dilations_0"), val = tensor([1, 1])]; - int32 e_5_groups_0 = const()[name = string("e_5_groups_0"), val = int32(1)]; - tensor model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94759040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100731072))))[name = string("model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_5_cast_fp16 = conv(dilations = e_5_dilations_0, groups = e_5_groups_0, pad = e_5_pad_0, pad_type = e_5_pad_type_0, strides = e_5_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_57_cast_fp16)[name = string("e_5_cast_fp16")]; - tensor var_3692_axes_0 = const()[name = string("op_3692_axes_0"), val = tensor([2])]; - tensor var_3692_cast_fp16 = squeeze(axes = var_3692_axes_0, x = e_5_cast_fp16)[name = string("op_3692_cast_fp16")]; - tensor var_3693 = const()[name = string("op_3693"), val = tensor([0, 2, 1])]; - int32 var_3704 = const()[name = string("op_3704"), val = int32(-1)]; - fp16 const_125_promoted_to_fp16 = const()[name = string("const_125_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_45_cast_fp16 = transpose(perm = var_3693, x = var_3692_cast_fp16)[name = string("transpose_209")]; - tensor var_3706_cast_fp16 = mul(x = hidden_states_45_cast_fp16, y = const_125_promoted_to_fp16)[name = string("op_3706_cast_fp16")]; - bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; - tensor input_59_cast_fp16 = concat(axis = var_3704, interleave = input_59_interleave_0, values = (hidden_states_45_cast_fp16, var_3706_cast_fp16))[name = string("input_59_cast_fp16")]; - tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; - fp16 var_3701_to_fp16 = const()[name = string("op_3701_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_3701_to_fp16, x = input_59_cast_fp16)[name = string("normed_69_cast_fp16")]; - tensor normed_71_begin_0 = const()[name = string("normed_71_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_71_end_0 = const()[name = string("normed_71_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_71_end_mask_0 = const()[name = string("normed_71_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_71_cast_fp16 = slice_by_index(begin = normed_71_begin_0, end = normed_71_end_0, end_mask = normed_71_end_mask_0, x = normed_69_cast_fp16)[name = string("normed_71_cast_fp16")]; - tensor var_3720_to_fp16 = const()[name = string("op_3720_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100749568)))]; - tensor hidden_states_47_cast_fp16 = mul(x = normed_71_cast_fp16, y = var_3720_to_fp16)[name = string("hidden_states_47_cast_fp16")]; - tensor hidden_states_49_cast_fp16 = add(x = hidden_states_43_cast_fp16, y = hidden_states_47_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; - int32 var_3774 = const()[name = string("op_3774"), val = int32(-1)]; - fp16 const_130_promoted_to_fp16 = const()[name = string("const_130_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3776_cast_fp16 = mul(x = hidden_states_49_cast_fp16, y = const_130_promoted_to_fp16)[name = string("op_3776_cast_fp16")]; - bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)]; - tensor input_61_cast_fp16 = concat(axis = var_3774, interleave = input_61_interleave_0, values = (hidden_states_49_cast_fp16, var_3776_cast_fp16))[name = string("input_61_cast_fp16")]; - tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; - fp16 var_3771_to_fp16 = const()[name = string("op_3771_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_3771_to_fp16, x = input_61_cast_fp16)[name = string("normed_73_cast_fp16")]; - tensor normed_75_begin_0 = const()[name = string("normed_75_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_75_end_0 = const()[name = string("normed_75_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_75_end_mask_0 = const()[name = string("normed_75_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_75_cast_fp16 = slice_by_index(begin = normed_75_begin_0, end = normed_75_end_0, end_mask = normed_75_end_mask_0, x = normed_73_cast_fp16)[name = string("normed_75_cast_fp16")]; - tensor var_3790_to_fp16 = const()[name = string("op_3790_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100751936)))]; - tensor hidden_states_51_cast_fp16 = mul(x = normed_75_cast_fp16, y = var_3790_to_fp16)[name = string("hidden_states_51_cast_fp16")]; - tensor var_3801 = const()[name = string("op_3801"), val = tensor([0, 2, 1])]; - tensor var_3804_axes_0 = const()[name = string("op_3804_axes_0"), val = tensor([2])]; - tensor var_3802_cast_fp16 = transpose(perm = var_3801, x = hidden_states_51_cast_fp16)[name = string("transpose_208")]; - tensor var_3804_cast_fp16 = expand_dims(axes = var_3804_axes_0, x = var_3802_cast_fp16)[name = string("op_3804_cast_fp16")]; - string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; - tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; - tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; - int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; - tensor query_states_25 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_3804_cast_fp16)[name = string("query_states_25")]; - string key_states_31_pad_type_0 = const()[name = string("key_states_31_pad_type_0"), val = string("valid")]; - tensor key_states_31_strides_0 = const()[name = string("key_states_31_strides_0"), val = tensor([1, 1])]; - tensor key_states_31_pad_0 = const()[name = string("key_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_31_dilations_0 = const()[name = string("key_states_31_dilations_0"), val = tensor([1, 1])]; - int32 key_states_31_groups_0 = const()[name = string("key_states_31_groups_0"), val = int32(1)]; - tensor key_states_31 = conv(dilations = key_states_31_dilations_0, groups = key_states_31_groups_0, pad = key_states_31_pad_0, pad_type = key_states_31_pad_type_0, strides = key_states_31_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_3804_cast_fp16)[name = string("key_states_31")]; - string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; - tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; - tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; - int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; - tensor value_states_25 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_3804_cast_fp16)[name = string("value_states_25")]; - tensor var_3846 = const()[name = string("op_3846"), val = tensor([1, 4, 256, 64])]; - tensor var_3847 = reshape(shape = var_3846, x = query_states_25)[name = string("op_3847")]; - tensor var_3852 = const()[name = string("op_3852"), val = tensor([0, 1, 3, 2])]; - tensor var_3857 = const()[name = string("op_3857"), val = tensor([1, 1, 256, 64])]; - tensor var_3858 = reshape(shape = var_3857, x = key_states_31)[name = string("op_3858")]; - tensor var_3863 = const()[name = string("op_3863"), val = tensor([0, 1, 3, 2])]; - tensor var_3868 = const()[name = string("op_3868"), val = tensor([1, 1, 256, 64])]; - tensor var_3869 = reshape(shape = var_3868, x = value_states_25)[name = string("op_3869")]; - tensor var_3874 = const()[name = string("op_3874"), val = tensor([0, 1, 3, 2])]; - int32 var_3885 = const()[name = string("op_3885"), val = int32(-1)]; - fp16 const_135_promoted = const()[name = string("const_135_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_53 = transpose(perm = var_3852, x = var_3847)[name = string("transpose_207")]; - tensor var_3887 = mul(x = hidden_states_53, y = const_135_promoted)[name = string("op_3887")]; - bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; - tensor input_65 = concat(axis = var_3885, interleave = input_65_interleave_0, values = (hidden_states_53, var_3887))[name = string("input_65")]; - tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; - fp16 var_3882_to_fp16 = const()[name = string("op_3882_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_3882_to_fp16, x = input_65)[name = string("normed_77_cast_fp16")]; - tensor normed_79_begin_0 = const()[name = string("normed_79_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_79_end_0 = const()[name = string("normed_79_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_79_end_mask_0 = const()[name = string("normed_79_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_79 = slice_by_index(begin = normed_79_begin_0, end = normed_79_end_0, end_mask = normed_79_end_mask_0, x = normed_77_cast_fp16)[name = string("normed_79")]; - tensor var_3901_to_fp16 = const()[name = string("op_3901_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100754304)))]; - tensor q_7_cast_fp16 = mul(x = normed_79, y = var_3901_to_fp16)[name = string("q_7_cast_fp16")]; - int32 var_3912 = const()[name = string("op_3912"), val = int32(-1)]; - fp16 const_139_promoted = const()[name = string("const_139_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_55 = transpose(perm = var_3863, x = var_3858)[name = string("transpose_206")]; - tensor var_3914 = mul(x = hidden_states_55, y = const_139_promoted)[name = string("op_3914")]; - bool input_67_interleave_0 = const()[name = string("input_67_interleave_0"), val = bool(false)]; - tensor input_67 = concat(axis = var_3912, interleave = input_67_interleave_0, values = (hidden_states_55, var_3914))[name = string("input_67")]; - tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; - fp16 var_3909_to_fp16 = const()[name = string("op_3909_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_3909_to_fp16, x = input_67)[name = string("normed_81_cast_fp16")]; - tensor normed_83_begin_0 = const()[name = string("normed_83_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_83_end_0 = const()[name = string("normed_83_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_83_end_mask_0 = const()[name = string("normed_83_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_83 = slice_by_index(begin = normed_83_begin_0, end = normed_83_end_0, end_mask = normed_83_end_mask_0, x = normed_81_cast_fp16)[name = string("normed_83")]; - tensor var_3928_to_fp16 = const()[name = string("op_3928_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100754880)))]; - tensor k_7_cast_fp16 = mul(x = normed_83, y = var_3928_to_fp16)[name = string("k_7_cast_fp16")]; - tensor var_3942_cast_fp16 = mul(x = q_7_cast_fp16, y = cos_5)[name = string("op_3942_cast_fp16")]; - tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7_cast_fp16)[name = string("x1_13_cast_fp16")]; - tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7_cast_fp16)[name = string("x2_13_cast_fp16")]; - fp16 const_145_promoted_to_fp16 = const()[name = string("const_145_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3963_cast_fp16 = mul(x = x2_13_cast_fp16, y = const_145_promoted_to_fp16)[name = string("op_3963_cast_fp16")]; - int32 var_3965 = const()[name = string("op_3965"), val = int32(-1)]; - bool var_3966_interleave_0 = const()[name = string("op_3966_interleave_0"), val = bool(false)]; - tensor var_3966_cast_fp16 = concat(axis = var_3965, interleave = var_3966_interleave_0, values = (var_3963_cast_fp16, x1_13_cast_fp16))[name = string("op_3966_cast_fp16")]; - tensor var_3967_cast_fp16 = mul(x = var_3966_cast_fp16, y = sin_5)[name = string("op_3967_cast_fp16")]; - tensor query_states_27_cast_fp16 = add(x = var_3942_cast_fp16, y = var_3967_cast_fp16)[name = string("query_states_27_cast_fp16")]; - tensor var_3970_cast_fp16 = mul(x = k_7_cast_fp16, y = cos_5)[name = string("op_3970_cast_fp16")]; - tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7_cast_fp16)[name = string("x1_15_cast_fp16")]; - tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7_cast_fp16)[name = string("x2_15_cast_fp16")]; - fp16 const_148_promoted_to_fp16 = const()[name = string("const_148_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3991_cast_fp16 = mul(x = x2_15_cast_fp16, y = const_148_promoted_to_fp16)[name = string("op_3991_cast_fp16")]; - int32 var_3993 = const()[name = string("op_3993"), val = int32(-1)]; - bool var_3994_interleave_0 = const()[name = string("op_3994_interleave_0"), val = bool(false)]; - tensor var_3994_cast_fp16 = concat(axis = var_3993, interleave = var_3994_interleave_0, values = (var_3991_cast_fp16, x1_15_cast_fp16))[name = string("op_3994_cast_fp16")]; - tensor var_3995_cast_fp16 = mul(x = var_3994_cast_fp16, y = sin_5)[name = string("op_3995_cast_fp16")]; - tensor key_states_33_cast_fp16 = add(x = var_3970_cast_fp16, y = var_3995_cast_fp16)[name = string("key_states_33_cast_fp16")]; - tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; - tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; - tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; - tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; - int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; - bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; - tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")]; - tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; - tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; - int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; - bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; - tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, end_pos_1, concat_57_values3_0))[name = string("concat_57")]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_7_stride_0, update = key_states_33_cast_fp16, x = coreml_update_state_57)[name = string("model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_110_write_state")]; - tensor coreml_update_state_58 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_110")]; - tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([25])]; - tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; - tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; - tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([26])]; - int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; - bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; - tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")]; - tensor concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor([0])]; - tensor concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor([0])]; - int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)]; - bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)]; - tensor concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, end_pos_1, concat_61_values3_0))[name = string("concat_61")]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_27 = transpose(perm = var_3874, x = var_3869)[name = string("transpose_205")]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_8_stride_0, update = value_states_27, x = coreml_update_state_58)[name = string("model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_111_write_state")]; - tensor coreml_update_state_59 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_111")]; - tensor var_4094_begin_0 = const()[name = string("op_4094_begin_0"), val = tensor([3, 0, 0, 0])]; - tensor var_4094_end_0 = const()[name = string("op_4094_end_0"), val = tensor([4, 1, 512, 256])]; - tensor var_4094_end_mask_0 = const()[name = string("op_4094_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_4094_cast_fp16 = slice_by_index(begin = var_4094_begin_0, end = var_4094_end_0, end_mask = var_4094_end_mask_0, x = coreml_update_state_59)[name = string("op_4094_cast_fp16")]; - tensor var_4101_begin_0 = const()[name = string("op_4101_begin_0"), val = tensor([25, 0, 0, 0])]; - tensor var_4101_end_0 = const()[name = string("op_4101_end_0"), val = tensor([26, 1, 512, 256])]; - tensor var_4101_end_mask_0 = const()[name = string("op_4101_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_4101_cast_fp16 = slice_by_index(begin = var_4101_begin_0, end = var_4101_end_0, end_mask = var_4101_end_mask_0, x = coreml_update_state_59)[name = string("op_4101_cast_fp16")]; - tensor var_4140 = const()[name = string("op_4140"), val = tensor([1, 4, 1, 1])]; - tensor x_53_cast_fp16 = tile(reps = var_4140, x = var_4094_cast_fp16)[name = string("x_53_cast_fp16")]; - tensor var_4160 = const()[name = string("op_4160"), val = tensor([1, 4, 1, 1])]; - tensor x_59_cast_fp16 = tile(reps = var_4160, x = var_4101_cast_fp16)[name = string("x_59_cast_fp16")]; - bool var_4187_transpose_x_0 = const()[name = string("op_4187_transpose_x_0"), val = bool(false)]; - bool var_4187_transpose_y_0 = const()[name = string("op_4187_transpose_y_0"), val = bool(true)]; - tensor var_4187 = matmul(transpose_x = var_4187_transpose_x_0, transpose_y = var_4187_transpose_y_0, x = query_states_27_cast_fp16, y = x_53_cast_fp16)[name = string("op_4187")]; - fp16 var_4188_to_fp16 = const()[name = string("op_4188_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_13_cast_fp16 = mul(x = var_4187, y = var_4188_to_fp16)[name = string("attn_weights_13_cast_fp16")]; - tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = mask_slice_1)[name = string("attn_weights_15_cast_fp16")]; - int32 var_4223 = const()[name = string("op_4223"), val = int32(-1)]; - tensor var_4225_cast_fp16 = softmax(axis = var_4223, x = attn_weights_15_cast_fp16)[name = string("op_4225_cast_fp16")]; - tensor concat_66 = const()[name = string("concat_66"), val = tensor([4, 64, 512])]; - tensor reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_4225_cast_fp16)[name = string("reshape_9_cast_fp16")]; - tensor concat_67 = const()[name = string("concat_67"), val = tensor([4, 512, 256])]; - tensor reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_59_cast_fp16)[name = string("reshape_10_cast_fp16")]; - bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; - bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)]; - tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; - tensor concat_71 = const()[name = string("concat_71"), val = tensor([1, 4, 64, 256])]; - tensor reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; - tensor var_4237_perm_0 = const()[name = string("op_4237_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_4256 = const()[name = string("op_4256"), val = tensor([1, 64, 1024])]; - tensor var_4237_cast_fp16 = transpose(perm = var_4237_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_204")]; - tensor attn_output_35_cast_fp16 = reshape(shape = var_4256, x = var_4237_cast_fp16)[name = string("attn_output_35_cast_fp16")]; - tensor var_4261 = const()[name = string("op_4261"), val = tensor([0, 2, 1])]; - string var_4277_pad_type_0 = const()[name = string("op_4277_pad_type_0"), val = string("valid")]; - int32 var_4277_groups_0 = const()[name = string("op_4277_groups_0"), val = int32(1)]; - tensor var_4277_strides_0 = const()[name = string("op_4277_strides_0"), val = tensor([1])]; - tensor var_4277_pad_0 = const()[name = string("op_4277_pad_0"), val = tensor([0, 0])]; - tensor var_4277_dilations_0 = const()[name = string("op_4277_dilations_0"), val = tensor([1])]; - tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100755456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101640256))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_4262_cast_fp16 = transpose(perm = var_4261, x = attn_output_35_cast_fp16)[name = string("transpose_203")]; - tensor var_4277_cast_fp16 = conv(dilations = var_4277_dilations_0, groups = var_4277_groups_0, pad = var_4277_pad_0, pad_type = var_4277_pad_type_0, strides = var_4277_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_4262_cast_fp16)[name = string("op_4277_cast_fp16")]; - tensor var_4281 = const()[name = string("op_4281"), val = tensor([0, 2, 1])]; - int32 var_4292 = const()[name = string("op_4292"), val = int32(-1)]; - fp16 const_160_promoted_to_fp16 = const()[name = string("const_160_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_57_cast_fp16 = transpose(perm = var_4281, x = var_4277_cast_fp16)[name = string("transpose_202")]; - tensor var_4294_cast_fp16 = mul(x = hidden_states_57_cast_fp16, y = const_160_promoted_to_fp16)[name = string("op_4294_cast_fp16")]; - bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; - tensor input_71_cast_fp16 = concat(axis = var_4292, interleave = input_71_interleave_0, values = (hidden_states_57_cast_fp16, var_4294_cast_fp16))[name = string("input_71_cast_fp16")]; - tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; - fp16 var_4289_to_fp16 = const()[name = string("op_4289_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_4289_to_fp16, x = input_71_cast_fp16)[name = string("normed_85_cast_fp16")]; - tensor normed_87_begin_0 = const()[name = string("normed_87_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_87_end_0 = const()[name = string("normed_87_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_87_end_mask_0 = const()[name = string("normed_87_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_87_cast_fp16 = slice_by_index(begin = normed_87_begin_0, end = normed_87_end_0, end_mask = normed_87_end_mask_0, x = normed_85_cast_fp16)[name = string("normed_87_cast_fp16")]; - tensor var_4308_to_fp16 = const()[name = string("op_4308_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101658752)))]; - tensor attn_output_39_cast_fp16 = mul(x = normed_87_cast_fp16, y = var_4308_to_fp16)[name = string("attn_output_39_cast_fp16")]; - tensor hidden_states_59_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = attn_output_39_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; - int32 var_4321 = const()[name = string("op_4321"), val = int32(-1)]; - fp16 const_164_promoted_to_fp16 = const()[name = string("const_164_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4323_cast_fp16 = mul(x = hidden_states_59_cast_fp16, y = const_164_promoted_to_fp16)[name = string("op_4323_cast_fp16")]; - bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; - tensor input_73_cast_fp16 = concat(axis = var_4321, interleave = input_73_interleave_0, values = (hidden_states_59_cast_fp16, var_4323_cast_fp16))[name = string("input_73_cast_fp16")]; - tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; - fp16 var_4318_to_fp16 = const()[name = string("op_4318_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_4318_to_fp16, x = input_73_cast_fp16)[name = string("normed_89_cast_fp16")]; - tensor normed_91_begin_0 = const()[name = string("normed_91_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_91_end_0 = const()[name = string("normed_91_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_91_end_mask_0 = const()[name = string("normed_91_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_91_cast_fp16 = slice_by_index(begin = normed_91_begin_0, end = normed_91_end_0, end_mask = normed_91_end_mask_0, x = normed_89_cast_fp16)[name = string("normed_91_cast_fp16")]; - tensor var_4337_to_fp16 = const()[name = string("op_4337_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101661120)))]; - tensor x_61_cast_fp16 = mul(x = normed_91_cast_fp16, y = var_4337_to_fp16)[name = string("x_61_cast_fp16")]; - tensor var_4349 = const()[name = string("op_4349"), val = tensor([0, 2, 1])]; - tensor input_75_axes_0 = const()[name = string("input_75_axes_0"), val = tensor([2])]; - tensor var_4350_cast_fp16 = transpose(perm = var_4349, x = x_61_cast_fp16)[name = string("transpose_201")]; - tensor input_75_cast_fp16 = expand_dims(axes = input_75_axes_0, x = var_4350_cast_fp16)[name = string("input_75_cast_fp16")]; - string x_63_pad_type_0 = const()[name = string("x_63_pad_type_0"), val = string("valid")]; - tensor x_63_strides_0 = const()[name = string("x_63_strides_0"), val = tensor([1, 1])]; - tensor x_63_pad_0 = const()[name = string("x_63_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_63_dilations_0 = const()[name = string("x_63_dilations_0"), val = tensor([1, 1])]; - int32 x_63_groups_0 = const()[name = string("x_63_groups_0"), val = int32(1)]; - tensor model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(915256064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(921228096))))[name = string("model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_63_cast_fp16 = conv(dilations = x_63_dilations_0, groups = x_63_groups_0, pad = x_63_pad_0, pad_type = x_63_pad_type_0, strides = x_63_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("x_63_cast_fp16")]; - string b_7_pad_type_0 = const()[name = string("b_7_pad_type_0"), val = string("valid")]; - tensor b_7_strides_0 = const()[name = string("b_7_strides_0"), val = tensor([1, 1])]; - tensor b_7_pad_0 = const()[name = string("b_7_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_7_dilations_0 = const()[name = string("b_7_dilations_0"), val = tensor([1, 1])]; - int32 b_7_groups_0 = const()[name = string("b_7_groups_0"), val = int32(1)]; - tensor model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(921338752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(927310784))))[name = string("model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_7_cast_fp16 = conv(dilations = b_7_dilations_0, groups = b_7_groups_0, pad = b_7_pad_0, pad_type = b_7_pad_type_0, strides = b_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("b_7_cast_fp16")]; - string var_4375_mode_0 = const()[name = string("op_4375_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_4375_cast_fp16 = gelu(mode = var_4375_mode_0, x = x_63_cast_fp16)[name = string("op_4375_cast_fp16")]; - tensor input_77_cast_fp16 = mul(x = var_4375_cast_fp16, y = b_7_cast_fp16)[name = string("input_77_cast_fp16")]; - string e_7_pad_type_0 = const()[name = string("e_7_pad_type_0"), val = string("valid")]; - tensor e_7_strides_0 = const()[name = string("e_7_strides_0"), val = tensor([1, 1])]; - tensor e_7_pad_0 = const()[name = string("e_7_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_7_dilations_0 = const()[name = string("e_7_dilations_0"), val = tensor([1, 1])]; - int32 e_7_groups_0 = const()[name = string("e_7_groups_0"), val = int32(1)]; - tensor model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113828864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119800896))))[name = string("model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_7_cast_fp16 = conv(dilations = e_7_dilations_0, groups = e_7_groups_0, pad = e_7_pad_0, pad_type = e_7_pad_type_0, strides = e_7_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_77_cast_fp16)[name = string("e_7_cast_fp16")]; - tensor var_4383_axes_0 = const()[name = string("op_4383_axes_0"), val = tensor([2])]; - tensor var_4383_cast_fp16 = squeeze(axes = var_4383_axes_0, x = e_7_cast_fp16)[name = string("op_4383_cast_fp16")]; - tensor var_4384 = const()[name = string("op_4384"), val = tensor([0, 2, 1])]; - int32 var_4395 = const()[name = string("op_4395"), val = int32(-1)]; - fp16 const_168_promoted_to_fp16 = const()[name = string("const_168_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_61_cast_fp16 = transpose(perm = var_4384, x = var_4383_cast_fp16)[name = string("transpose_200")]; - tensor var_4397_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_168_promoted_to_fp16)[name = string("op_4397_cast_fp16")]; - bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)]; - tensor input_79_cast_fp16 = concat(axis = var_4395, interleave = input_79_interleave_0, values = (hidden_states_61_cast_fp16, var_4397_cast_fp16))[name = string("input_79_cast_fp16")]; - tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; - fp16 var_4392_to_fp16 = const()[name = string("op_4392_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_4392_to_fp16, x = input_79_cast_fp16)[name = string("normed_93_cast_fp16")]; - tensor normed_95_begin_0 = const()[name = string("normed_95_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_95_end_0 = const()[name = string("normed_95_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_95_end_mask_0 = const()[name = string("normed_95_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_95_cast_fp16 = slice_by_index(begin = normed_95_begin_0, end = normed_95_end_0, end_mask = normed_95_end_mask_0, x = normed_93_cast_fp16)[name = string("normed_95_cast_fp16")]; - tensor var_4411_to_fp16 = const()[name = string("op_4411_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119819392)))]; - tensor hidden_states_63_cast_fp16 = mul(x = normed_95_cast_fp16, y = var_4411_to_fp16)[name = string("hidden_states_63_cast_fp16")]; - tensor hidden_states_65_cast_fp16 = add(x = hidden_states_59_cast_fp16, y = hidden_states_63_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; - int32 var_4465 = const()[name = string("op_4465"), val = int32(-1)]; - fp16 const_173_promoted_to_fp16 = const()[name = string("const_173_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4467_cast_fp16 = mul(x = hidden_states_65_cast_fp16, y = const_173_promoted_to_fp16)[name = string("op_4467_cast_fp16")]; - bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; - tensor input_81_cast_fp16 = concat(axis = var_4465, interleave = input_81_interleave_0, values = (hidden_states_65_cast_fp16, var_4467_cast_fp16))[name = string("input_81_cast_fp16")]; - tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; - fp16 var_4462_to_fp16 = const()[name = string("op_4462_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_4462_to_fp16, x = input_81_cast_fp16)[name = string("normed_97_cast_fp16")]; - tensor normed_99_begin_0 = const()[name = string("normed_99_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_99_end_0 = const()[name = string("normed_99_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_99_end_mask_0 = const()[name = string("normed_99_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_99_cast_fp16 = slice_by_index(begin = normed_99_begin_0, end = normed_99_end_0, end_mask = normed_99_end_mask_0, x = normed_97_cast_fp16)[name = string("normed_99_cast_fp16")]; - tensor var_4481_to_fp16 = const()[name = string("op_4481_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119821760)))]; - tensor hidden_states_67_cast_fp16 = mul(x = normed_99_cast_fp16, y = var_4481_to_fp16)[name = string("hidden_states_67_cast_fp16")]; - tensor var_4492 = const()[name = string("op_4492"), val = tensor([0, 2, 1])]; - tensor var_4495_axes_0 = const()[name = string("op_4495_axes_0"), val = tensor([2])]; - tensor var_4493_cast_fp16 = transpose(perm = var_4492, x = hidden_states_67_cast_fp16)[name = string("transpose_199")]; - tensor var_4495_cast_fp16 = expand_dims(axes = var_4495_axes_0, x = var_4493_cast_fp16)[name = string("op_4495_cast_fp16")]; - string query_states_33_pad_type_0 = const()[name = string("query_states_33_pad_type_0"), val = string("valid")]; - tensor query_states_33_strides_0 = const()[name = string("query_states_33_strides_0"), val = tensor([1, 1])]; - tensor query_states_33_pad_0 = const()[name = string("query_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_33_dilations_0 = const()[name = string("query_states_33_dilations_0"), val = tensor([1, 1])]; - int32 query_states_33_groups_0 = const()[name = string("query_states_33_groups_0"), val = int32(1)]; - tensor query_states_33 = conv(dilations = query_states_33_dilations_0, groups = query_states_33_groups_0, pad = query_states_33_pad_0, pad_type = query_states_33_pad_type_0, strides = query_states_33_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_4495_cast_fp16)[name = string("query_states_33")]; - string key_states_41_pad_type_0 = const()[name = string("key_states_41_pad_type_0"), val = string("valid")]; - tensor key_states_41_strides_0 = const()[name = string("key_states_41_strides_0"), val = tensor([1, 1])]; - tensor key_states_41_pad_0 = const()[name = string("key_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_41_dilations_0 = const()[name = string("key_states_41_dilations_0"), val = tensor([1, 1])]; - int32 key_states_41_groups_0 = const()[name = string("key_states_41_groups_0"), val = int32(1)]; - tensor key_states_41 = conv(dilations = key_states_41_dilations_0, groups = key_states_41_groups_0, pad = key_states_41_pad_0, pad_type = key_states_41_pad_type_0, strides = key_states_41_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_4495_cast_fp16)[name = string("key_states_41")]; - string value_states_33_pad_type_0 = const()[name = string("value_states_33_pad_type_0"), val = string("valid")]; - tensor value_states_33_strides_0 = const()[name = string("value_states_33_strides_0"), val = tensor([1, 1])]; - tensor value_states_33_pad_0 = const()[name = string("value_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_33_dilations_0 = const()[name = string("value_states_33_dilations_0"), val = tensor([1, 1])]; - int32 value_states_33_groups_0 = const()[name = string("value_states_33_groups_0"), val = int32(1)]; - tensor value_states_33 = conv(dilations = value_states_33_dilations_0, groups = value_states_33_groups_0, pad = value_states_33_pad_0, pad_type = value_states_33_pad_type_0, strides = value_states_33_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_4495_cast_fp16)[name = string("value_states_33")]; - tensor var_4537 = const()[name = string("op_4537"), val = tensor([1, 4, 256, 64])]; - tensor var_4538 = reshape(shape = var_4537, x = query_states_33)[name = string("op_4538")]; - tensor var_4543 = const()[name = string("op_4543"), val = tensor([0, 1, 3, 2])]; - tensor var_4548 = const()[name = string("op_4548"), val = tensor([1, 1, 256, 64])]; - tensor var_4549 = reshape(shape = var_4548, x = key_states_41)[name = string("op_4549")]; - tensor var_4554 = const()[name = string("op_4554"), val = tensor([0, 1, 3, 2])]; - tensor var_4559 = const()[name = string("op_4559"), val = tensor([1, 1, 256, 64])]; - tensor var_4560 = reshape(shape = var_4559, x = value_states_33)[name = string("op_4560")]; - tensor var_4565 = const()[name = string("op_4565"), val = tensor([0, 1, 3, 2])]; - int32 var_4576 = const()[name = string("op_4576"), val = int32(-1)]; - fp16 const_178_promoted = const()[name = string("const_178_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_69 = transpose(perm = var_4543, x = var_4538)[name = string("transpose_198")]; - tensor var_4578 = mul(x = hidden_states_69, y = const_178_promoted)[name = string("op_4578")]; - bool input_85_interleave_0 = const()[name = string("input_85_interleave_0"), val = bool(false)]; - tensor input_85 = concat(axis = var_4576, interleave = input_85_interleave_0, values = (hidden_states_69, var_4578))[name = string("input_85")]; - tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; - fp16 var_4573_to_fp16 = const()[name = string("op_4573_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_4573_to_fp16, x = input_85)[name = string("normed_101_cast_fp16")]; - tensor normed_103_begin_0 = const()[name = string("normed_103_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_103_end_0 = const()[name = string("normed_103_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_103_end_mask_0 = const()[name = string("normed_103_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_103 = slice_by_index(begin = normed_103_begin_0, end = normed_103_end_0, end_mask = normed_103_end_mask_0, x = normed_101_cast_fp16)[name = string("normed_103")]; - tensor var_4592_to_fp16 = const()[name = string("op_4592_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119824128)))]; - tensor q_9_cast_fp16 = mul(x = normed_103, y = var_4592_to_fp16)[name = string("q_9_cast_fp16")]; - int32 var_4603 = const()[name = string("op_4603"), val = int32(-1)]; - fp16 const_182_promoted = const()[name = string("const_182_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_71 = transpose(perm = var_4554, x = var_4549)[name = string("transpose_197")]; - tensor var_4605 = mul(x = hidden_states_71, y = const_182_promoted)[name = string("op_4605")]; - bool input_87_interleave_0 = const()[name = string("input_87_interleave_0"), val = bool(false)]; - tensor input_87 = concat(axis = var_4603, interleave = input_87_interleave_0, values = (hidden_states_71, var_4605))[name = string("input_87")]; - tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; - fp16 var_4600_to_fp16 = const()[name = string("op_4600_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_4600_to_fp16, x = input_87)[name = string("normed_105_cast_fp16")]; - tensor normed_107_begin_0 = const()[name = string("normed_107_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_107_end_0 = const()[name = string("normed_107_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_107_end_mask_0 = const()[name = string("normed_107_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_107 = slice_by_index(begin = normed_107_begin_0, end = normed_107_end_0, end_mask = normed_107_end_mask_0, x = normed_105_cast_fp16)[name = string("normed_107")]; - tensor var_4619_to_fp16 = const()[name = string("op_4619_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119824704)))]; - tensor k_9_cast_fp16 = mul(x = normed_107, y = var_4619_to_fp16)[name = string("k_9_cast_fp16")]; - tensor var_4633_cast_fp16 = mul(x = q_9_cast_fp16, y = cos_5)[name = string("op_4633_cast_fp16")]; - tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9_cast_fp16)[name = string("x1_17_cast_fp16")]; - tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9_cast_fp16)[name = string("x2_17_cast_fp16")]; - fp16 const_188_promoted_to_fp16 = const()[name = string("const_188_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4654_cast_fp16 = mul(x = x2_17_cast_fp16, y = const_188_promoted_to_fp16)[name = string("op_4654_cast_fp16")]; - int32 var_4656 = const()[name = string("op_4656"), val = int32(-1)]; - bool var_4657_interleave_0 = const()[name = string("op_4657_interleave_0"), val = bool(false)]; - tensor var_4657_cast_fp16 = concat(axis = var_4656, interleave = var_4657_interleave_0, values = (var_4654_cast_fp16, x1_17_cast_fp16))[name = string("op_4657_cast_fp16")]; - tensor var_4658_cast_fp16 = mul(x = var_4657_cast_fp16, y = sin_5)[name = string("op_4658_cast_fp16")]; - tensor query_states_35_cast_fp16 = add(x = var_4633_cast_fp16, y = var_4658_cast_fp16)[name = string("query_states_35_cast_fp16")]; - tensor var_4661_cast_fp16 = mul(x = k_9_cast_fp16, y = cos_5)[name = string("op_4661_cast_fp16")]; - tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9_cast_fp16)[name = string("x1_19_cast_fp16")]; - tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9_cast_fp16)[name = string("x2_19_cast_fp16")]; - fp16 const_191_promoted_to_fp16 = const()[name = string("const_191_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4682_cast_fp16 = mul(x = x2_19_cast_fp16, y = const_191_promoted_to_fp16)[name = string("op_4682_cast_fp16")]; - int32 var_4684 = const()[name = string("op_4684"), val = int32(-1)]; - bool var_4685_interleave_0 = const()[name = string("op_4685_interleave_0"), val = bool(false)]; - tensor var_4685_cast_fp16 = concat(axis = var_4684, interleave = var_4685_interleave_0, values = (var_4682_cast_fp16, x1_19_cast_fp16))[name = string("op_4685_cast_fp16")]; - tensor var_4686_cast_fp16 = mul(x = var_4685_cast_fp16, y = sin_5)[name = string("op_4686_cast_fp16")]; - tensor key_states_43_cast_fp16 = add(x = var_4661_cast_fp16, y = var_4686_cast_fp16)[name = string("key_states_43_cast_fp16")]; - tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; - tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; - tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; - tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; - int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; - bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; - tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_74")]; - tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; - tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; - int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; - bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; - tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_52, concat_75_values1_0, end_pos_1, concat_75_values3_0))[name = string("concat_75")]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_9_stride_0, update = key_states_43_cast_fp16, x = coreml_update_state_59)[name = string("model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_112_write_state")]; - tensor coreml_update_state_60 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_112")]; - tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([26])]; - tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; - tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; - tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([27])]; - int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; - bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; - tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_78")]; - tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; - tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; - int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; - bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; - tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_58, concat_79_values1_0, end_pos_1, concat_79_values3_0))[name = string("concat_79")]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_35 = transpose(perm = var_4565, x = var_4560)[name = string("transpose_196")]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_10_stride_0, update = value_states_35, x = coreml_update_state_60)[name = string("model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_113_write_state")]; - tensor coreml_update_state_61 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_113")]; - tensor var_4785_begin_0 = const()[name = string("op_4785_begin_0"), val = tensor([4, 0, 0, 0])]; - tensor var_4785_end_0 = const()[name = string("op_4785_end_0"), val = tensor([5, 1, 512, 256])]; - tensor var_4785_end_mask_0 = const()[name = string("op_4785_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_4785_cast_fp16 = slice_by_index(begin = var_4785_begin_0, end = var_4785_end_0, end_mask = var_4785_end_mask_0, x = coreml_update_state_61)[name = string("op_4785_cast_fp16")]; - tensor var_4792_begin_0 = const()[name = string("op_4792_begin_0"), val = tensor([26, 0, 0, 0])]; - tensor var_4792_end_0 = const()[name = string("op_4792_end_0"), val = tensor([27, 1, 512, 256])]; - tensor var_4792_end_mask_0 = const()[name = string("op_4792_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_4792_cast_fp16 = slice_by_index(begin = var_4792_begin_0, end = var_4792_end_0, end_mask = var_4792_end_mask_0, x = coreml_update_state_61)[name = string("op_4792_cast_fp16")]; - tensor var_4831 = const()[name = string("op_4831"), val = tensor([1, 4, 1, 1])]; - tensor x_69_cast_fp16 = tile(reps = var_4831, x = var_4785_cast_fp16)[name = string("x_69_cast_fp16")]; - tensor var_4851 = const()[name = string("op_4851"), val = tensor([1, 4, 1, 1])]; - tensor x_75_cast_fp16 = tile(reps = var_4851, x = var_4792_cast_fp16)[name = string("x_75_cast_fp16")]; - bool var_4878_transpose_x_0 = const()[name = string("op_4878_transpose_x_0"), val = bool(false)]; - bool var_4878_transpose_y_0 = const()[name = string("op_4878_transpose_y_0"), val = bool(true)]; - tensor var_4878 = matmul(transpose_x = var_4878_transpose_x_0, transpose_y = var_4878_transpose_y_0, x = query_states_35_cast_fp16, y = x_69_cast_fp16)[name = string("op_4878")]; - fp16 var_4879_to_fp16 = const()[name = string("op_4879_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_17_cast_fp16 = mul(x = var_4878, y = var_4879_to_fp16)[name = string("attn_weights_17_cast_fp16")]; - tensor attn_weights_19_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = mask_slice_1)[name = string("attn_weights_19_cast_fp16")]; - int32 var_4914 = const()[name = string("op_4914"), val = int32(-1)]; - tensor var_4916_cast_fp16 = softmax(axis = var_4914, x = attn_weights_19_cast_fp16)[name = string("op_4916_cast_fp16")]; - tensor concat_84 = const()[name = string("concat_84"), val = tensor([4, 64, 512])]; - tensor reshape_12_cast_fp16 = reshape(shape = concat_84, x = var_4916_cast_fp16)[name = string("reshape_12_cast_fp16")]; - tensor concat_85 = const()[name = string("concat_85"), val = tensor([4, 512, 256])]; - tensor reshape_13_cast_fp16 = reshape(shape = concat_85, x = x_75_cast_fp16)[name = string("reshape_13_cast_fp16")]; - bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)]; - bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(false)]; - tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_12_cast_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")]; - tensor concat_89 = const()[name = string("concat_89"), val = tensor([1, 4, 64, 256])]; - tensor reshape_14_cast_fp16 = reshape(shape = concat_89, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")]; - tensor var_4928_perm_0 = const()[name = string("op_4928_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_4947 = const()[name = string("op_4947"), val = tensor([1, 64, 1024])]; - tensor var_4928_cast_fp16 = transpose(perm = var_4928_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_195")]; - tensor attn_output_45_cast_fp16 = reshape(shape = var_4947, x = var_4928_cast_fp16)[name = string("attn_output_45_cast_fp16")]; - tensor var_4952 = const()[name = string("op_4952"), val = tensor([0, 2, 1])]; - string var_4968_pad_type_0 = const()[name = string("op_4968_pad_type_0"), val = string("valid")]; - int32 var_4968_groups_0 = const()[name = string("op_4968_groups_0"), val = int32(1)]; - tensor var_4968_strides_0 = const()[name = string("op_4968_strides_0"), val = tensor([1])]; - tensor var_4968_pad_0 = const()[name = string("op_4968_pad_0"), val = tensor([0, 0])]; - tensor var_4968_dilations_0 = const()[name = string("op_4968_dilations_0"), val = tensor([1])]; - tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119825280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120710080))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_4953_cast_fp16 = transpose(perm = var_4952, x = attn_output_45_cast_fp16)[name = string("transpose_194")]; - tensor var_4968_cast_fp16 = conv(dilations = var_4968_dilations_0, groups = var_4968_groups_0, pad = var_4968_pad_0, pad_type = var_4968_pad_type_0, strides = var_4968_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_4953_cast_fp16)[name = string("op_4968_cast_fp16")]; - tensor var_4972 = const()[name = string("op_4972"), val = tensor([0, 2, 1])]; - int32 var_4983 = const()[name = string("op_4983"), val = int32(-1)]; - fp16 const_203_promoted_to_fp16 = const()[name = string("const_203_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_73_cast_fp16 = transpose(perm = var_4972, x = var_4968_cast_fp16)[name = string("transpose_193")]; - tensor var_4985_cast_fp16 = mul(x = hidden_states_73_cast_fp16, y = const_203_promoted_to_fp16)[name = string("op_4985_cast_fp16")]; - bool input_91_interleave_0 = const()[name = string("input_91_interleave_0"), val = bool(false)]; - tensor input_91_cast_fp16 = concat(axis = var_4983, interleave = input_91_interleave_0, values = (hidden_states_73_cast_fp16, var_4985_cast_fp16))[name = string("input_91_cast_fp16")]; - tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; - fp16 var_4980_to_fp16 = const()[name = string("op_4980_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_4980_to_fp16, x = input_91_cast_fp16)[name = string("normed_109_cast_fp16")]; - tensor normed_111_begin_0 = const()[name = string("normed_111_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_111_end_0 = const()[name = string("normed_111_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_111_end_mask_0 = const()[name = string("normed_111_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_111_cast_fp16 = slice_by_index(begin = normed_111_begin_0, end = normed_111_end_0, end_mask = normed_111_end_mask_0, x = normed_109_cast_fp16)[name = string("normed_111_cast_fp16")]; - tensor var_4999_to_fp16 = const()[name = string("op_4999_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120728576)))]; - tensor attn_output_49_cast_fp16 = mul(x = normed_111_cast_fp16, y = var_4999_to_fp16)[name = string("attn_output_49_cast_fp16")]; - tensor hidden_states_75_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = attn_output_49_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; - int32 var_5012 = const()[name = string("op_5012"), val = int32(-1)]; - fp16 const_207_promoted_to_fp16 = const()[name = string("const_207_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5014_cast_fp16 = mul(x = hidden_states_75_cast_fp16, y = const_207_promoted_to_fp16)[name = string("op_5014_cast_fp16")]; - bool input_93_interleave_0 = const()[name = string("input_93_interleave_0"), val = bool(false)]; - tensor input_93_cast_fp16 = concat(axis = var_5012, interleave = input_93_interleave_0, values = (hidden_states_75_cast_fp16, var_5014_cast_fp16))[name = string("input_93_cast_fp16")]; - tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; - fp16 var_5009_to_fp16 = const()[name = string("op_5009_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_5009_to_fp16, x = input_93_cast_fp16)[name = string("normed_113_cast_fp16")]; - tensor normed_115_begin_0 = const()[name = string("normed_115_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_115_end_0 = const()[name = string("normed_115_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_115_end_mask_0 = const()[name = string("normed_115_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_115_cast_fp16 = slice_by_index(begin = normed_115_begin_0, end = normed_115_end_0, end_mask = normed_115_end_mask_0, x = normed_113_cast_fp16)[name = string("normed_115_cast_fp16")]; - tensor var_5028_to_fp16 = const()[name = string("op_5028_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120730944)))]; - tensor x_77_cast_fp16 = mul(x = normed_115_cast_fp16, y = var_5028_to_fp16)[name = string("x_77_cast_fp16")]; - tensor var_5040 = const()[name = string("op_5040"), val = tensor([0, 2, 1])]; - tensor input_95_axes_0 = const()[name = string("input_95_axes_0"), val = tensor([2])]; - tensor var_5041_cast_fp16 = transpose(perm = var_5040, x = x_77_cast_fp16)[name = string("transpose_192")]; - tensor input_95_cast_fp16 = expand_dims(axes = input_95_axes_0, x = var_5041_cast_fp16)[name = string("input_95_cast_fp16")]; - string x_79_pad_type_0 = const()[name = string("x_79_pad_type_0"), val = string("valid")]; - tensor x_79_strides_0 = const()[name = string("x_79_strides_0"), val = tensor([1, 1])]; - tensor x_79_pad_0 = const()[name = string("x_79_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_79_dilations_0 = const()[name = string("x_79_dilations_0"), val = tensor([1, 1])]; - int32 x_79_groups_0 = const()[name = string("x_79_groups_0"), val = int32(1)]; - tensor model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(927421440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933393472))))[name = string("model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_79_cast_fp16 = conv(dilations = x_79_dilations_0, groups = x_79_groups_0, pad = x_79_pad_0, pad_type = x_79_pad_type_0, strides = x_79_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_95_cast_fp16)[name = string("x_79_cast_fp16")]; - string b_9_pad_type_0 = const()[name = string("b_9_pad_type_0"), val = string("valid")]; - tensor b_9_strides_0 = const()[name = string("b_9_strides_0"), val = tensor([1, 1])]; - tensor b_9_pad_0 = const()[name = string("b_9_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_9_dilations_0 = const()[name = string("b_9_dilations_0"), val = tensor([1, 1])]; - int32 b_9_groups_0 = const()[name = string("b_9_groups_0"), val = int32(1)]; - tensor model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(933504128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939476160))))[name = string("model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_9_cast_fp16 = conv(dilations = b_9_dilations_0, groups = b_9_groups_0, pad = b_9_pad_0, pad_type = b_9_pad_type_0, strides = b_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_95_cast_fp16)[name = string("b_9_cast_fp16")]; - string var_5066_mode_0 = const()[name = string("op_5066_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_5066_cast_fp16 = gelu(mode = var_5066_mode_0, x = x_79_cast_fp16)[name = string("op_5066_cast_fp16")]; - tensor input_97_cast_fp16 = mul(x = var_5066_cast_fp16, y = b_9_cast_fp16)[name = string("input_97_cast_fp16")]; - string e_9_pad_type_0 = const()[name = string("e_9_pad_type_0"), val = string("valid")]; - tensor e_9_strides_0 = const()[name = string("e_9_strides_0"), val = tensor([1, 1])]; - tensor e_9_pad_0 = const()[name = string("e_9_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_9_dilations_0 = const()[name = string("e_9_dilations_0"), val = tensor([1, 1])]; - int32 e_9_groups_0 = const()[name = string("e_9_groups_0"), val = int32(1)]; - tensor model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132898688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138870720))))[name = string("model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_9_cast_fp16 = conv(dilations = e_9_dilations_0, groups = e_9_groups_0, pad = e_9_pad_0, pad_type = e_9_pad_type_0, strides = e_9_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_97_cast_fp16)[name = string("e_9_cast_fp16")]; - tensor var_5074_axes_0 = const()[name = string("op_5074_axes_0"), val = tensor([2])]; - tensor var_5074_cast_fp16 = squeeze(axes = var_5074_axes_0, x = e_9_cast_fp16)[name = string("op_5074_cast_fp16")]; - tensor var_5075 = const()[name = string("op_5075"), val = tensor([0, 2, 1])]; - int32 var_5086 = const()[name = string("op_5086"), val = int32(-1)]; - fp16 const_211_promoted_to_fp16 = const()[name = string("const_211_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_77_cast_fp16 = transpose(perm = var_5075, x = var_5074_cast_fp16)[name = string("transpose_191")]; - tensor var_5088_cast_fp16 = mul(x = hidden_states_77_cast_fp16, y = const_211_promoted_to_fp16)[name = string("op_5088_cast_fp16")]; - bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)]; - tensor input_99_cast_fp16 = concat(axis = var_5086, interleave = input_99_interleave_0, values = (hidden_states_77_cast_fp16, var_5088_cast_fp16))[name = string("input_99_cast_fp16")]; - tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; - fp16 var_5083_to_fp16 = const()[name = string("op_5083_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_5083_to_fp16, x = input_99_cast_fp16)[name = string("normed_117_cast_fp16")]; - tensor normed_119_begin_0 = const()[name = string("normed_119_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_119_end_0 = const()[name = string("normed_119_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_119_end_mask_0 = const()[name = string("normed_119_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_119_cast_fp16 = slice_by_index(begin = normed_119_begin_0, end = normed_119_end_0, end_mask = normed_119_end_mask_0, x = normed_117_cast_fp16)[name = string("normed_119_cast_fp16")]; - tensor var_5102_to_fp16 = const()[name = string("op_5102_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138889216)))]; - tensor hidden_states_79_cast_fp16 = mul(x = normed_119_cast_fp16, y = var_5102_to_fp16)[name = string("hidden_states_79_cast_fp16")]; - tensor hidden_states_81_cast_fp16 = add(x = hidden_states_75_cast_fp16, y = hidden_states_79_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; - int32 var_5119_axis_0 = const()[name = string("op_5119_axis_0"), val = int32(1)]; - int32 var_5119_batch_dims_0 = const()[name = string("op_5119_batch_dims_0"), val = int32(0)]; - bool var_5119_validate_indices_0 = const()[name = string("op_5119_validate_indices_0"), val = bool(false)]; - tensor var_5111_to_fp16 = const()[name = string("op_5111_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143085952)))]; - tensor var_5119_cast_fp16_cast_uint16 = gather(axis = var_5119_axis_0, batch_dims = var_5119_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_5119_validate_indices_0, x = var_5111_to_fp16)[name = string("op_5119_cast_fp16_cast_uint16")]; - tensor var_5123 = const()[name = string("op_5123"), val = tensor([1, 64, 1, 256])]; - tensor cos_31_cast_fp16 = reshape(shape = var_5123, x = var_5119_cast_fp16_cast_uint16)[name = string("cos_31_cast_fp16")]; - int32 var_5133_axis_0 = const()[name = string("op_5133_axis_0"), val = int32(1)]; - int32 var_5133_batch_dims_0 = const()[name = string("op_5133_batch_dims_0"), val = int32(0)]; - bool var_5133_validate_indices_0 = const()[name = string("op_5133_validate_indices_0"), val = bool(false)]; - tensor var_5125_to_fp16 = const()[name = string("op_5125_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138891584)))]; - tensor var_5133_cast_fp16_cast_uint16 = gather(axis = var_5133_axis_0, batch_dims = var_5133_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_5133_validate_indices_0, x = var_5125_to_fp16)[name = string("op_5133_cast_fp16_cast_uint16")]; - tensor var_5137 = const()[name = string("op_5137"), val = tensor([1, 64, 1, 256])]; - tensor sin_31_cast_fp16 = reshape(shape = var_5137, x = var_5133_cast_fp16_cast_uint16)[name = string("sin_31_cast_fp16")]; - int32 var_5158 = const()[name = string("op_5158"), val = int32(-1)]; - fp16 const_216_promoted_to_fp16 = const()[name = string("const_216_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5160_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_216_promoted_to_fp16)[name = string("op_5160_cast_fp16")]; - bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; - tensor input_101_cast_fp16 = concat(axis = var_5158, interleave = input_101_interleave_0, values = (hidden_states_81_cast_fp16, var_5160_cast_fp16))[name = string("input_101_cast_fp16")]; - tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; - fp16 var_5155_to_fp16 = const()[name = string("op_5155_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_5155_to_fp16, x = input_101_cast_fp16)[name = string("normed_121_cast_fp16")]; - tensor normed_123_begin_0 = const()[name = string("normed_123_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_123_end_0 = const()[name = string("normed_123_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_123_end_mask_0 = const()[name = string("normed_123_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_123_cast_fp16 = slice_by_index(begin = normed_123_begin_0, end = normed_123_end_0, end_mask = normed_123_end_mask_0, x = normed_121_cast_fp16)[name = string("normed_123_cast_fp16")]; - tensor var_5174_to_fp16 = const()[name = string("op_5174_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147280320)))]; - tensor hidden_states_83_cast_fp16 = mul(x = normed_123_cast_fp16, y = var_5174_to_fp16)[name = string("hidden_states_83_cast_fp16")]; - tensor var_5185 = const()[name = string("op_5185"), val = tensor([0, 2, 1])]; - tensor var_5188_axes_0 = const()[name = string("op_5188_axes_0"), val = tensor([2])]; - tensor var_5186_cast_fp16 = transpose(perm = var_5185, x = hidden_states_83_cast_fp16)[name = string("transpose_190")]; - tensor var_5188_cast_fp16 = expand_dims(axes = var_5188_axes_0, x = var_5186_cast_fp16)[name = string("op_5188_cast_fp16")]; - string query_states_41_pad_type_0 = const()[name = string("query_states_41_pad_type_0"), val = string("valid")]; - tensor query_states_41_strides_0 = const()[name = string("query_states_41_strides_0"), val = tensor([1, 1])]; - tensor query_states_41_pad_0 = const()[name = string("query_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_41_dilations_0 = const()[name = string("query_states_41_dilations_0"), val = tensor([1, 1])]; - int32 query_states_41_groups_0 = const()[name = string("query_states_41_groups_0"), val = int32(1)]; - tensor query_states_41 = conv(dilations = query_states_41_dilations_0, groups = query_states_41_groups_0, pad = query_states_41_pad_0, pad_type = query_states_41_pad_type_0, strides = query_states_41_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_5188_cast_fp16)[name = string("query_states_41")]; - string key_states_51_pad_type_0 = const()[name = string("key_states_51_pad_type_0"), val = string("valid")]; - tensor key_states_51_strides_0 = const()[name = string("key_states_51_strides_0"), val = tensor([1, 1])]; - tensor key_states_51_pad_0 = const()[name = string("key_states_51_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_51_dilations_0 = const()[name = string("key_states_51_dilations_0"), val = tensor([1, 1])]; - int32 key_states_51_groups_0 = const()[name = string("key_states_51_groups_0"), val = int32(1)]; - tensor key_states_51 = conv(dilations = key_states_51_dilations_0, groups = key_states_51_groups_0, pad = key_states_51_pad_0, pad_type = key_states_51_pad_type_0, strides = key_states_51_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_5188_cast_fp16)[name = string("key_states_51")]; - string value_states_41_pad_type_0 = const()[name = string("value_states_41_pad_type_0"), val = string("valid")]; - tensor value_states_41_strides_0 = const()[name = string("value_states_41_strides_0"), val = tensor([1, 1])]; - tensor value_states_41_pad_0 = const()[name = string("value_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_41_dilations_0 = const()[name = string("value_states_41_dilations_0"), val = tensor([1, 1])]; - int32 value_states_41_groups_0 = const()[name = string("value_states_41_groups_0"), val = int32(1)]; - tensor value_states_41 = conv(dilations = value_states_41_dilations_0, groups = value_states_41_groups_0, pad = value_states_41_pad_0, pad_type = value_states_41_pad_type_0, strides = value_states_41_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_5188_cast_fp16)[name = string("value_states_41")]; - tensor var_5230 = const()[name = string("op_5230"), val = tensor([1, 4, 256, 64])]; - tensor var_5231 = reshape(shape = var_5230, x = query_states_41)[name = string("op_5231")]; - tensor var_5236 = const()[name = string("op_5236"), val = tensor([0, 1, 3, 2])]; - tensor var_5241 = const()[name = string("op_5241"), val = tensor([1, 1, 256, 64])]; - tensor var_5242 = reshape(shape = var_5241, x = key_states_51)[name = string("op_5242")]; - tensor var_5247 = const()[name = string("op_5247"), val = tensor([0, 1, 3, 2])]; - tensor var_5252 = const()[name = string("op_5252"), val = tensor([1, 1, 256, 64])]; - tensor var_5253 = reshape(shape = var_5252, x = value_states_41)[name = string("op_5253")]; - tensor var_5258 = const()[name = string("op_5258"), val = tensor([0, 1, 3, 2])]; - int32 var_5269 = const()[name = string("op_5269"), val = int32(-1)]; - fp16 const_221_promoted = const()[name = string("const_221_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_85 = transpose(perm = var_5236, x = var_5231)[name = string("transpose_189")]; - tensor var_5271 = mul(x = hidden_states_85, y = const_221_promoted)[name = string("op_5271")]; - bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; - tensor input_105 = concat(axis = var_5269, interleave = input_105_interleave_0, values = (hidden_states_85, var_5271))[name = string("input_105")]; - tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; - fp16 var_5266_to_fp16 = const()[name = string("op_5266_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_5266_to_fp16, x = input_105)[name = string("normed_125_cast_fp16")]; - tensor normed_127_begin_0 = const()[name = string("normed_127_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_127_end_0 = const()[name = string("normed_127_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_127_end_mask_0 = const()[name = string("normed_127_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_127 = slice_by_index(begin = normed_127_begin_0, end = normed_127_end_0, end_mask = normed_127_end_mask_0, x = normed_125_cast_fp16)[name = string("normed_127")]; - tensor var_5285_to_fp16 = const()[name = string("op_5285_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147282688)))]; - tensor q_11_cast_fp16 = mul(x = normed_127, y = var_5285_to_fp16)[name = string("q_11_cast_fp16")]; - int32 var_5296 = const()[name = string("op_5296"), val = int32(-1)]; - fp16 const_225_promoted = const()[name = string("const_225_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_87 = transpose(perm = var_5247, x = var_5242)[name = string("transpose_188")]; - tensor var_5298 = mul(x = hidden_states_87, y = const_225_promoted)[name = string("op_5298")]; - bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; - tensor input_107 = concat(axis = var_5296, interleave = input_107_interleave_0, values = (hidden_states_87, var_5298))[name = string("input_107")]; - tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; - fp16 var_5293_to_fp16 = const()[name = string("op_5293_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_5293_to_fp16, x = input_107)[name = string("normed_129_cast_fp16")]; - tensor normed_131_begin_0 = const()[name = string("normed_131_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_131_end_0 = const()[name = string("normed_131_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_131_end_mask_0 = const()[name = string("normed_131_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_131 = slice_by_index(begin = normed_131_begin_0, end = normed_131_end_0, end_mask = normed_131_end_mask_0, x = normed_129_cast_fp16)[name = string("normed_131")]; - tensor var_5312_to_fp16 = const()[name = string("op_5312_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147283264)))]; - tensor k_11_cast_fp16 = mul(x = normed_131, y = var_5312_to_fp16)[name = string("k_11_cast_fp16")]; - tensor var_5318 = const()[name = string("op_5318"), val = tensor([0, 2, 1, 3])]; - tensor var_5324 = const()[name = string("op_5324"), val = tensor([0, 2, 1, 3])]; - tensor cos_35 = transpose(perm = var_5318, x = cos_31_cast_fp16)[name = string("transpose_187")]; - tensor var_5326_cast_fp16 = mul(x = q_11_cast_fp16, y = cos_35)[name = string("op_5326_cast_fp16")]; - tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11_cast_fp16)[name = string("x1_21_cast_fp16")]; - tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11_cast_fp16)[name = string("x2_21_cast_fp16")]; - fp16 const_231_promoted_to_fp16 = const()[name = string("const_231_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5347_cast_fp16 = mul(x = x2_21_cast_fp16, y = const_231_promoted_to_fp16)[name = string("op_5347_cast_fp16")]; - int32 var_5349 = const()[name = string("op_5349"), val = int32(-1)]; - bool var_5350_interleave_0 = const()[name = string("op_5350_interleave_0"), val = bool(false)]; - tensor var_5350_cast_fp16 = concat(axis = var_5349, interleave = var_5350_interleave_0, values = (var_5347_cast_fp16, x1_21_cast_fp16))[name = string("op_5350_cast_fp16")]; - tensor sin_35 = transpose(perm = var_5324, x = sin_31_cast_fp16)[name = string("transpose_186")]; - tensor var_5351_cast_fp16 = mul(x = var_5350_cast_fp16, y = sin_35)[name = string("op_5351_cast_fp16")]; - tensor query_states_43_cast_fp16 = add(x = var_5326_cast_fp16, y = var_5351_cast_fp16)[name = string("query_states_43_cast_fp16")]; - tensor var_5354_cast_fp16 = mul(x = k_11_cast_fp16, y = cos_35)[name = string("op_5354_cast_fp16")]; - tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11_cast_fp16)[name = string("x1_23_cast_fp16")]; - tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11_cast_fp16)[name = string("x2_23_cast_fp16")]; - fp16 const_234_promoted_to_fp16 = const()[name = string("const_234_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5375_cast_fp16 = mul(x = x2_23_cast_fp16, y = const_234_promoted_to_fp16)[name = string("op_5375_cast_fp16")]; - int32 var_5377 = const()[name = string("op_5377"), val = int32(-1)]; - bool var_5378_interleave_0 = const()[name = string("op_5378_interleave_0"), val = bool(false)]; - tensor var_5378_cast_fp16 = concat(axis = var_5377, interleave = var_5378_interleave_0, values = (var_5375_cast_fp16, x1_23_cast_fp16))[name = string("op_5378_cast_fp16")]; - tensor var_5379_cast_fp16 = mul(x = var_5378_cast_fp16, y = sin_35)[name = string("op_5379_cast_fp16")]; - tensor key_states_53_cast_fp16 = add(x = var_5354_cast_fp16, y = var_5379_cast_fp16)[name = string("key_states_53_cast_fp16")]; - tensor read_state_1 = read_state(input = model_model_kv_cache_global)[name = string("read_state_1")]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_1_stride_0, update = key_states_53_cast_fp16, x = read_state_1)[name = string("model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_114_write_state")]; - tensor coreml_update_state_62 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_114")]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_43 = transpose(perm = var_5258, x = var_5253)[name = string("transpose_185")]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_2_stride_0, update = value_states_43, x = coreml_update_state_62)[name = string("model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_115_write_state")]; - tensor coreml_update_state_63 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_115")]; - tensor var_5478_begin_0 = const()[name = string("op_5478_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_5478_end_0 = const()[name = string("op_5478_end_0"), val = tensor([1, 1, 4096, 256])]; - tensor var_5478_end_mask_0 = const()[name = string("op_5478_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_5478_cast_fp16 = slice_by_index(begin = var_5478_begin_0, end = var_5478_end_0, end_mask = var_5478_end_mask_0, x = coreml_update_state_63)[name = string("op_5478_cast_fp16")]; - tensor var_5485_begin_0 = const()[name = string("op_5485_begin_0"), val = tensor([4, 0, 0, 0])]; - tensor var_5485_end_0 = const()[name = string("op_5485_end_0"), val = tensor([5, 1, 4096, 256])]; - tensor var_5485_end_mask_0 = const()[name = string("op_5485_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_5485_cast_fp16 = slice_by_index(begin = var_5485_begin_0, end = var_5485_end_0, end_mask = var_5485_end_mask_0, x = coreml_update_state_63)[name = string("op_5485_cast_fp16")]; - tensor var_5524 = const()[name = string("op_5524"), val = tensor([1, 4, 1, 1])]; - tensor x_85_cast_fp16 = tile(reps = var_5524, x = var_5478_cast_fp16)[name = string("x_85_cast_fp16")]; - tensor var_5544 = const()[name = string("op_5544"), val = tensor([1, 4, 1, 1])]; - tensor x_91_cast_fp16 = tile(reps = var_5544, x = var_5485_cast_fp16)[name = string("x_91_cast_fp16")]; - bool var_5571_transpose_x_0 = const()[name = string("op_5571_transpose_x_0"), val = bool(false)]; - bool var_5571_transpose_y_0 = const()[name = string("op_5571_transpose_y_0"), val = bool(true)]; - tensor var_5571 = matmul(transpose_x = var_5571_transpose_x_0, transpose_y = var_5571_transpose_y_0, x = query_states_43_cast_fp16, y = x_85_cast_fp16)[name = string("op_5571")]; - fp16 var_5572_to_fp16 = const()[name = string("op_5572_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_21_cast_fp16 = mul(x = var_5571, y = var_5572_to_fp16)[name = string("attn_weights_21_cast_fp16")]; - tensor attn_weights_23_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("attn_weights_23_cast_fp16")]; - int32 var_5607 = const()[name = string("op_5607"), val = int32(-1)]; - tensor var_5609_cast_fp16 = softmax(axis = var_5607, x = attn_weights_23_cast_fp16)[name = string("op_5609_cast_fp16")]; - tensor concat_102 = const()[name = string("concat_102"), val = tensor([4, 64, 4096])]; - tensor reshape_15_cast_fp16 = reshape(shape = concat_102, x = var_5609_cast_fp16)[name = string("reshape_15_cast_fp16")]; - tensor concat_103 = const()[name = string("concat_103"), val = tensor([4, 4096, 256])]; - tensor reshape_16_cast_fp16 = reshape(shape = concat_103, x = x_91_cast_fp16)[name = string("reshape_16_cast_fp16")]; - bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)]; - bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(false)]; - tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_15_cast_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")]; - tensor concat_107 = const()[name = string("concat_107"), val = tensor([1, 4, 64, 256])]; - tensor reshape_17_cast_fp16 = reshape(shape = concat_107, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")]; - tensor var_5621_perm_0 = const()[name = string("op_5621_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_5640 = const()[name = string("op_5640"), val = tensor([1, 64, 1024])]; - tensor var_5621_cast_fp16 = transpose(perm = var_5621_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_184")]; - tensor attn_output_55_cast_fp16 = reshape(shape = var_5640, x = var_5621_cast_fp16)[name = string("attn_output_55_cast_fp16")]; - tensor var_5645 = const()[name = string("op_5645"), val = tensor([0, 2, 1])]; - string var_5661_pad_type_0 = const()[name = string("op_5661_pad_type_0"), val = string("valid")]; - int32 var_5661_groups_0 = const()[name = string("op_5661_groups_0"), val = int32(1)]; - tensor var_5661_strides_0 = const()[name = string("op_5661_strides_0"), val = tensor([1])]; - tensor var_5661_pad_0 = const()[name = string("op_5661_pad_0"), val = tensor([0, 0])]; - tensor var_5661_dilations_0 = const()[name = string("op_5661_dilations_0"), val = tensor([1])]; - tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147283840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148168640))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_5646_cast_fp16 = transpose(perm = var_5645, x = attn_output_55_cast_fp16)[name = string("transpose_183")]; - tensor var_5661_cast_fp16 = conv(dilations = var_5661_dilations_0, groups = var_5661_groups_0, pad = var_5661_pad_0, pad_type = var_5661_pad_type_0, strides = var_5661_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_5646_cast_fp16)[name = string("op_5661_cast_fp16")]; - tensor var_5665 = const()[name = string("op_5665"), val = tensor([0, 2, 1])]; - int32 var_5676 = const()[name = string("op_5676"), val = int32(-1)]; - fp16 const_246_promoted_to_fp16 = const()[name = string("const_246_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_89_cast_fp16 = transpose(perm = var_5665, x = var_5661_cast_fp16)[name = string("transpose_182")]; - tensor var_5678_cast_fp16 = mul(x = hidden_states_89_cast_fp16, y = const_246_promoted_to_fp16)[name = string("op_5678_cast_fp16")]; - bool input_111_interleave_0 = const()[name = string("input_111_interleave_0"), val = bool(false)]; - tensor input_111_cast_fp16 = concat(axis = var_5676, interleave = input_111_interleave_0, values = (hidden_states_89_cast_fp16, var_5678_cast_fp16))[name = string("input_111_cast_fp16")]; - tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; - fp16 var_5673_to_fp16 = const()[name = string("op_5673_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_5673_to_fp16, x = input_111_cast_fp16)[name = string("normed_133_cast_fp16")]; - tensor normed_135_begin_0 = const()[name = string("normed_135_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_135_end_0 = const()[name = string("normed_135_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_135_end_mask_0 = const()[name = string("normed_135_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_135_cast_fp16 = slice_by_index(begin = normed_135_begin_0, end = normed_135_end_0, end_mask = normed_135_end_mask_0, x = normed_133_cast_fp16)[name = string("normed_135_cast_fp16")]; - tensor var_5692_to_fp16 = const()[name = string("op_5692_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148187136)))]; - tensor attn_output_59_cast_fp16 = mul(x = normed_135_cast_fp16, y = var_5692_to_fp16)[name = string("attn_output_59_cast_fp16")]; - tensor hidden_states_91_cast_fp16 = add(x = hidden_states_81_cast_fp16, y = attn_output_59_cast_fp16)[name = string("hidden_states_91_cast_fp16")]; - int32 var_5705 = const()[name = string("op_5705"), val = int32(-1)]; - fp16 const_250_promoted_to_fp16 = const()[name = string("const_250_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5707_cast_fp16 = mul(x = hidden_states_91_cast_fp16, y = const_250_promoted_to_fp16)[name = string("op_5707_cast_fp16")]; - bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; - tensor input_113_cast_fp16 = concat(axis = var_5705, interleave = input_113_interleave_0, values = (hidden_states_91_cast_fp16, var_5707_cast_fp16))[name = string("input_113_cast_fp16")]; - tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; - fp16 var_5702_to_fp16 = const()[name = string("op_5702_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_5702_to_fp16, x = input_113_cast_fp16)[name = string("normed_137_cast_fp16")]; - tensor normed_139_begin_0 = const()[name = string("normed_139_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_139_end_0 = const()[name = string("normed_139_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_139_end_mask_0 = const()[name = string("normed_139_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_139_cast_fp16 = slice_by_index(begin = normed_139_begin_0, end = normed_139_end_0, end_mask = normed_139_end_mask_0, x = normed_137_cast_fp16)[name = string("normed_139_cast_fp16")]; - tensor var_5721_to_fp16 = const()[name = string("op_5721_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148189504)))]; - tensor x_93_cast_fp16 = mul(x = normed_139_cast_fp16, y = var_5721_to_fp16)[name = string("x_93_cast_fp16")]; - tensor var_5733 = const()[name = string("op_5733"), val = tensor([0, 2, 1])]; - tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; - tensor var_5734_cast_fp16 = transpose(perm = var_5733, x = x_93_cast_fp16)[name = string("transpose_181")]; - tensor input_115_cast_fp16 = expand_dims(axes = input_115_axes_0, x = var_5734_cast_fp16)[name = string("input_115_cast_fp16")]; - string x_95_pad_type_0 = const()[name = string("x_95_pad_type_0"), val = string("valid")]; - tensor x_95_strides_0 = const()[name = string("x_95_strides_0"), val = tensor([1, 1])]; - tensor x_95_pad_0 = const()[name = string("x_95_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_95_dilations_0 = const()[name = string("x_95_dilations_0"), val = tensor([1, 1])]; - int32 x_95_groups_0 = const()[name = string("x_95_groups_0"), val = int32(1)]; - tensor model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(939586816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(945558848))))[name = string("model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_95_cast_fp16 = conv(dilations = x_95_dilations_0, groups = x_95_groups_0, pad = x_95_pad_0, pad_type = x_95_pad_type_0, strides = x_95_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_115_cast_fp16)[name = string("x_95_cast_fp16")]; - string b_11_pad_type_0 = const()[name = string("b_11_pad_type_0"), val = string("valid")]; - tensor b_11_strides_0 = const()[name = string("b_11_strides_0"), val = tensor([1, 1])]; - tensor b_11_pad_0 = const()[name = string("b_11_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_11_dilations_0 = const()[name = string("b_11_dilations_0"), val = tensor([1, 1])]; - int32 b_11_groups_0 = const()[name = string("b_11_groups_0"), val = int32(1)]; - tensor model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(945669504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(951641536))))[name = string("model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_11_cast_fp16 = conv(dilations = b_11_dilations_0, groups = b_11_groups_0, pad = b_11_pad_0, pad_type = b_11_pad_type_0, strides = b_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_115_cast_fp16)[name = string("b_11_cast_fp16")]; - string var_5759_mode_0 = const()[name = string("op_5759_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_5759_cast_fp16 = gelu(mode = var_5759_mode_0, x = x_95_cast_fp16)[name = string("op_5759_cast_fp16")]; - tensor input_117_cast_fp16 = mul(x = var_5759_cast_fp16, y = b_11_cast_fp16)[name = string("input_117_cast_fp16")]; - string e_11_pad_type_0 = const()[name = string("e_11_pad_type_0"), val = string("valid")]; - tensor e_11_strides_0 = const()[name = string("e_11_strides_0"), val = tensor([1, 1])]; - tensor e_11_pad_0 = const()[name = string("e_11_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_11_dilations_0 = const()[name = string("e_11_dilations_0"), val = tensor([1, 1])]; - int32 e_11_groups_0 = const()[name = string("e_11_groups_0"), val = int32(1)]; - tensor model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160357248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166329280))))[name = string("model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_11_cast_fp16 = conv(dilations = e_11_dilations_0, groups = e_11_groups_0, pad = e_11_pad_0, pad_type = e_11_pad_type_0, strides = e_11_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("e_11_cast_fp16")]; - tensor var_5767_axes_0 = const()[name = string("op_5767_axes_0"), val = tensor([2])]; - tensor var_5767_cast_fp16 = squeeze(axes = var_5767_axes_0, x = e_11_cast_fp16)[name = string("op_5767_cast_fp16")]; - tensor var_5768 = const()[name = string("op_5768"), val = tensor([0, 2, 1])]; - int32 var_5779 = const()[name = string("op_5779"), val = int32(-1)]; - fp16 const_254_promoted_to_fp16 = const()[name = string("const_254_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_93_cast_fp16 = transpose(perm = var_5768, x = var_5767_cast_fp16)[name = string("transpose_180")]; - tensor var_5781_cast_fp16 = mul(x = hidden_states_93_cast_fp16, y = const_254_promoted_to_fp16)[name = string("op_5781_cast_fp16")]; - bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; - tensor input_119_cast_fp16 = concat(axis = var_5779, interleave = input_119_interleave_0, values = (hidden_states_93_cast_fp16, var_5781_cast_fp16))[name = string("input_119_cast_fp16")]; - tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; - fp16 var_5776_to_fp16 = const()[name = string("op_5776_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_5776_to_fp16, x = input_119_cast_fp16)[name = string("normed_141_cast_fp16")]; - tensor normed_143_begin_0 = const()[name = string("normed_143_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_143_end_0 = const()[name = string("normed_143_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_143_end_mask_0 = const()[name = string("normed_143_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_143_cast_fp16 = slice_by_index(begin = normed_143_begin_0, end = normed_143_end_0, end_mask = normed_143_end_mask_0, x = normed_141_cast_fp16)[name = string("normed_143_cast_fp16")]; - tensor var_5795_to_fp16 = const()[name = string("op_5795_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166347776)))]; - tensor hidden_states_95_cast_fp16 = mul(x = normed_143_cast_fp16, y = var_5795_to_fp16)[name = string("hidden_states_95_cast_fp16")]; - tensor hidden_states_97_cast_fp16 = add(x = hidden_states_91_cast_fp16, y = hidden_states_95_cast_fp16)[name = string("hidden_states_97_cast_fp16")]; - int32 var_5849 = const()[name = string("op_5849"), val = int32(-1)]; - fp16 const_259_promoted_to_fp16 = const()[name = string("const_259_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5851_cast_fp16 = mul(x = hidden_states_97_cast_fp16, y = const_259_promoted_to_fp16)[name = string("op_5851_cast_fp16")]; - bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; - tensor input_121_cast_fp16 = concat(axis = var_5849, interleave = input_121_interleave_0, values = (hidden_states_97_cast_fp16, var_5851_cast_fp16))[name = string("input_121_cast_fp16")]; - tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; - fp16 var_5846_to_fp16 = const()[name = string("op_5846_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_5846_to_fp16, x = input_121_cast_fp16)[name = string("normed_145_cast_fp16")]; - tensor normed_147_begin_0 = const()[name = string("normed_147_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_147_end_0 = const()[name = string("normed_147_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_147_end_mask_0 = const()[name = string("normed_147_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_147_cast_fp16 = slice_by_index(begin = normed_147_begin_0, end = normed_147_end_0, end_mask = normed_147_end_mask_0, x = normed_145_cast_fp16)[name = string("normed_147_cast_fp16")]; - tensor var_5865_to_fp16 = const()[name = string("op_5865_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166350144)))]; - tensor hidden_states_99_cast_fp16 = mul(x = normed_147_cast_fp16, y = var_5865_to_fp16)[name = string("hidden_states_99_cast_fp16")]; - tensor var_5876 = const()[name = string("op_5876"), val = tensor([0, 2, 1])]; - tensor var_5879_axes_0 = const()[name = string("op_5879_axes_0"), val = tensor([2])]; - tensor var_5877_cast_fp16 = transpose(perm = var_5876, x = hidden_states_99_cast_fp16)[name = string("transpose_179")]; - tensor var_5879_cast_fp16 = expand_dims(axes = var_5879_axes_0, x = var_5877_cast_fp16)[name = string("op_5879_cast_fp16")]; - string query_states_49_pad_type_0 = const()[name = string("query_states_49_pad_type_0"), val = string("valid")]; - tensor query_states_49_strides_0 = const()[name = string("query_states_49_strides_0"), val = tensor([1, 1])]; - tensor query_states_49_pad_0 = const()[name = string("query_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_49_dilations_0 = const()[name = string("query_states_49_dilations_0"), val = tensor([1, 1])]; - int32 query_states_49_groups_0 = const()[name = string("query_states_49_groups_0"), val = int32(1)]; - tensor query_states_49 = conv(dilations = query_states_49_dilations_0, groups = query_states_49_groups_0, pad = query_states_49_pad_0, pad_type = query_states_49_pad_type_0, strides = query_states_49_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_5879_cast_fp16)[name = string("query_states_49")]; - string key_states_61_pad_type_0 = const()[name = string("key_states_61_pad_type_0"), val = string("valid")]; - tensor key_states_61_strides_0 = const()[name = string("key_states_61_strides_0"), val = tensor([1, 1])]; - tensor key_states_61_pad_0 = const()[name = string("key_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_61_dilations_0 = const()[name = string("key_states_61_dilations_0"), val = tensor([1, 1])]; - int32 key_states_61_groups_0 = const()[name = string("key_states_61_groups_0"), val = int32(1)]; - tensor key_states_61 = conv(dilations = key_states_61_dilations_0, groups = key_states_61_groups_0, pad = key_states_61_pad_0, pad_type = key_states_61_pad_type_0, strides = key_states_61_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_5879_cast_fp16)[name = string("key_states_61")]; - string value_states_49_pad_type_0 = const()[name = string("value_states_49_pad_type_0"), val = string("valid")]; - tensor value_states_49_strides_0 = const()[name = string("value_states_49_strides_0"), val = tensor([1, 1])]; - tensor value_states_49_pad_0 = const()[name = string("value_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_49_dilations_0 = const()[name = string("value_states_49_dilations_0"), val = tensor([1, 1])]; - int32 value_states_49_groups_0 = const()[name = string("value_states_49_groups_0"), val = int32(1)]; - tensor value_states_49 = conv(dilations = value_states_49_dilations_0, groups = value_states_49_groups_0, pad = value_states_49_pad_0, pad_type = value_states_49_pad_type_0, strides = value_states_49_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_5879_cast_fp16)[name = string("value_states_49")]; - tensor var_5921 = const()[name = string("op_5921"), val = tensor([1, 4, 256, 64])]; - tensor var_5922 = reshape(shape = var_5921, x = query_states_49)[name = string("op_5922")]; - tensor var_5927 = const()[name = string("op_5927"), val = tensor([0, 1, 3, 2])]; - tensor var_5932 = const()[name = string("op_5932"), val = tensor([1, 1, 256, 64])]; - tensor var_5933 = reshape(shape = var_5932, x = key_states_61)[name = string("op_5933")]; - tensor var_5938 = const()[name = string("op_5938"), val = tensor([0, 1, 3, 2])]; - tensor var_5943 = const()[name = string("op_5943"), val = tensor([1, 1, 256, 64])]; - tensor var_5944 = reshape(shape = var_5943, x = value_states_49)[name = string("op_5944")]; - tensor var_5949 = const()[name = string("op_5949"), val = tensor([0, 1, 3, 2])]; - int32 var_5960 = const()[name = string("op_5960"), val = int32(-1)]; - fp16 const_264_promoted = const()[name = string("const_264_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_101 = transpose(perm = var_5927, x = var_5922)[name = string("transpose_178")]; - tensor var_5962 = mul(x = hidden_states_101, y = const_264_promoted)[name = string("op_5962")]; - bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; - tensor input_125 = concat(axis = var_5960, interleave = input_125_interleave_0, values = (hidden_states_101, var_5962))[name = string("input_125")]; - tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; - fp16 var_5957_to_fp16 = const()[name = string("op_5957_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_5957_to_fp16, x = input_125)[name = string("normed_149_cast_fp16")]; - tensor normed_151_begin_0 = const()[name = string("normed_151_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_151_end_0 = const()[name = string("normed_151_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_151_end_mask_0 = const()[name = string("normed_151_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_151 = slice_by_index(begin = normed_151_begin_0, end = normed_151_end_0, end_mask = normed_151_end_mask_0, x = normed_149_cast_fp16)[name = string("normed_151")]; - tensor var_5976_to_fp16 = const()[name = string("op_5976_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166352512)))]; - tensor q_13_cast_fp16 = mul(x = normed_151, y = var_5976_to_fp16)[name = string("q_13_cast_fp16")]; - int32 var_5987 = const()[name = string("op_5987"), val = int32(-1)]; - fp16 const_268_promoted = const()[name = string("const_268_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_103 = transpose(perm = var_5938, x = var_5933)[name = string("transpose_177")]; - tensor var_5989 = mul(x = hidden_states_103, y = const_268_promoted)[name = string("op_5989")]; - bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)]; - tensor input_127 = concat(axis = var_5987, interleave = input_127_interleave_0, values = (hidden_states_103, var_5989))[name = string("input_127")]; - tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; - fp16 var_5984_to_fp16 = const()[name = string("op_5984_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_5984_to_fp16, x = input_127)[name = string("normed_153_cast_fp16")]; - tensor normed_155_begin_0 = const()[name = string("normed_155_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_155_end_0 = const()[name = string("normed_155_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_155_end_mask_0 = const()[name = string("normed_155_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_155 = slice_by_index(begin = normed_155_begin_0, end = normed_155_end_0, end_mask = normed_155_end_mask_0, x = normed_153_cast_fp16)[name = string("normed_155")]; - tensor var_6003_to_fp16 = const()[name = string("op_6003_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166353088)))]; - tensor k_13_cast_fp16 = mul(x = normed_155, y = var_6003_to_fp16)[name = string("k_13_cast_fp16")]; - tensor var_6017_cast_fp16 = mul(x = q_13_cast_fp16, y = cos_5)[name = string("op_6017_cast_fp16")]; - tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13_cast_fp16)[name = string("x1_25_cast_fp16")]; - tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13_cast_fp16)[name = string("x2_25_cast_fp16")]; - fp16 const_274_promoted_to_fp16 = const()[name = string("const_274_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6038_cast_fp16 = mul(x = x2_25_cast_fp16, y = const_274_promoted_to_fp16)[name = string("op_6038_cast_fp16")]; - int32 var_6040 = const()[name = string("op_6040"), val = int32(-1)]; - bool var_6041_interleave_0 = const()[name = string("op_6041_interleave_0"), val = bool(false)]; - tensor var_6041_cast_fp16 = concat(axis = var_6040, interleave = var_6041_interleave_0, values = (var_6038_cast_fp16, x1_25_cast_fp16))[name = string("op_6041_cast_fp16")]; - tensor var_6042_cast_fp16 = mul(x = var_6041_cast_fp16, y = sin_5)[name = string("op_6042_cast_fp16")]; - tensor query_states_51_cast_fp16 = add(x = var_6017_cast_fp16, y = var_6042_cast_fp16)[name = string("query_states_51_cast_fp16")]; - tensor var_6045_cast_fp16 = mul(x = k_13_cast_fp16, y = cos_5)[name = string("op_6045_cast_fp16")]; - tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13_cast_fp16)[name = string("x1_27_cast_fp16")]; - tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13_cast_fp16)[name = string("x2_27_cast_fp16")]; - fp16 const_277_promoted_to_fp16 = const()[name = string("const_277_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6066_cast_fp16 = mul(x = x2_27_cast_fp16, y = const_277_promoted_to_fp16)[name = string("op_6066_cast_fp16")]; - int32 var_6068 = const()[name = string("op_6068"), val = int32(-1)]; - bool var_6069_interleave_0 = const()[name = string("op_6069_interleave_0"), val = bool(false)]; - tensor var_6069_cast_fp16 = concat(axis = var_6068, interleave = var_6069_interleave_0, values = (var_6066_cast_fp16, x1_27_cast_fp16))[name = string("op_6069_cast_fp16")]; - tensor var_6070_cast_fp16 = mul(x = var_6069_cast_fp16, y = sin_5)[name = string("op_6070_cast_fp16")]; - tensor key_states_63_cast_fp16 = add(x = var_6045_cast_fp16, y = var_6070_cast_fp16)[name = string("key_states_63_cast_fp16")]; - tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([5])]; - tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; - tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; - tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([6])]; - int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; - bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; - tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_110")]; - tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; - tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; - int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; - bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; - tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_76, concat_111_values1_0, end_pos_1, concat_111_values3_0))[name = string("concat_111")]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_11_stride_0, update = key_states_63_cast_fp16, x = coreml_update_state_61)[name = string("model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_116_write_state")]; - tensor coreml_update_state_64 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_116")]; - tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([27])]; - tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; - tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; - tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([28])]; - int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; - bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; - tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_114")]; - tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; - tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; - int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; - bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; - tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_82, concat_115_values1_0, end_pos_1, concat_115_values3_0))[name = string("concat_115")]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_51 = transpose(perm = var_5949, x = var_5944)[name = string("transpose_176")]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_12_stride_0, update = value_states_51, x = coreml_update_state_64)[name = string("model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_117_write_state")]; - tensor coreml_update_state_65 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_117")]; - tensor var_6169_begin_0 = const()[name = string("op_6169_begin_0"), val = tensor([5, 0, 0, 0])]; - tensor var_6169_end_0 = const()[name = string("op_6169_end_0"), val = tensor([6, 1, 512, 256])]; - tensor var_6169_end_mask_0 = const()[name = string("op_6169_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_6169_cast_fp16 = slice_by_index(begin = var_6169_begin_0, end = var_6169_end_0, end_mask = var_6169_end_mask_0, x = coreml_update_state_65)[name = string("op_6169_cast_fp16")]; - tensor var_6176_begin_0 = const()[name = string("op_6176_begin_0"), val = tensor([27, 0, 0, 0])]; - tensor var_6176_end_0 = const()[name = string("op_6176_end_0"), val = tensor([28, 1, 512, 256])]; - tensor var_6176_end_mask_0 = const()[name = string("op_6176_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_6176_cast_fp16 = slice_by_index(begin = var_6176_begin_0, end = var_6176_end_0, end_mask = var_6176_end_mask_0, x = coreml_update_state_65)[name = string("op_6176_cast_fp16")]; - tensor var_6215 = const()[name = string("op_6215"), val = tensor([1, 4, 1, 1])]; - tensor x_101_cast_fp16 = tile(reps = var_6215, x = var_6169_cast_fp16)[name = string("x_101_cast_fp16")]; - tensor var_6235 = const()[name = string("op_6235"), val = tensor([1, 4, 1, 1])]; - tensor x_107_cast_fp16 = tile(reps = var_6235, x = var_6176_cast_fp16)[name = string("x_107_cast_fp16")]; - bool var_6262_transpose_x_0 = const()[name = string("op_6262_transpose_x_0"), val = bool(false)]; - bool var_6262_transpose_y_0 = const()[name = string("op_6262_transpose_y_0"), val = bool(true)]; - tensor var_6262 = matmul(transpose_x = var_6262_transpose_x_0, transpose_y = var_6262_transpose_y_0, x = query_states_51_cast_fp16, y = x_101_cast_fp16)[name = string("op_6262")]; - fp16 var_6263_to_fp16 = const()[name = string("op_6263_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_25_cast_fp16 = mul(x = var_6262, y = var_6263_to_fp16)[name = string("attn_weights_25_cast_fp16")]; - tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = mask_slice_1)[name = string("attn_weights_27_cast_fp16")]; - int32 var_6298 = const()[name = string("op_6298"), val = int32(-1)]; - tensor var_6300_cast_fp16 = softmax(axis = var_6298, x = attn_weights_27_cast_fp16)[name = string("op_6300_cast_fp16")]; - tensor concat_120 = const()[name = string("concat_120"), val = tensor([4, 64, 512])]; - tensor reshape_18_cast_fp16 = reshape(shape = concat_120, x = var_6300_cast_fp16)[name = string("reshape_18_cast_fp16")]; - tensor concat_121 = const()[name = string("concat_121"), val = tensor([4, 512, 256])]; - tensor reshape_19_cast_fp16 = reshape(shape = concat_121, x = x_107_cast_fp16)[name = string("reshape_19_cast_fp16")]; - bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)]; - bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(false)]; - tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_18_cast_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")]; - tensor concat_125 = const()[name = string("concat_125"), val = tensor([1, 4, 64, 256])]; - tensor reshape_20_cast_fp16 = reshape(shape = concat_125, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")]; - tensor var_6312_perm_0 = const()[name = string("op_6312_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_6331 = const()[name = string("op_6331"), val = tensor([1, 64, 1024])]; - tensor var_6312_cast_fp16 = transpose(perm = var_6312_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_175")]; - tensor attn_output_65_cast_fp16 = reshape(shape = var_6331, x = var_6312_cast_fp16)[name = string("attn_output_65_cast_fp16")]; - tensor var_6336 = const()[name = string("op_6336"), val = tensor([0, 2, 1])]; - string var_6352_pad_type_0 = const()[name = string("op_6352_pad_type_0"), val = string("valid")]; - int32 var_6352_groups_0 = const()[name = string("op_6352_groups_0"), val = int32(1)]; - tensor var_6352_strides_0 = const()[name = string("op_6352_strides_0"), val = tensor([1])]; - tensor var_6352_pad_0 = const()[name = string("op_6352_pad_0"), val = tensor([0, 0])]; - tensor var_6352_dilations_0 = const()[name = string("op_6352_dilations_0"), val = tensor([1])]; - tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166353664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167238464))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_6337_cast_fp16 = transpose(perm = var_6336, x = attn_output_65_cast_fp16)[name = string("transpose_174")]; - tensor var_6352_cast_fp16 = conv(dilations = var_6352_dilations_0, groups = var_6352_groups_0, pad = var_6352_pad_0, pad_type = var_6352_pad_type_0, strides = var_6352_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_6337_cast_fp16)[name = string("op_6352_cast_fp16")]; - tensor var_6356 = const()[name = string("op_6356"), val = tensor([0, 2, 1])]; - int32 var_6367 = const()[name = string("op_6367"), val = int32(-1)]; - fp16 const_289_promoted_to_fp16 = const()[name = string("const_289_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_105_cast_fp16 = transpose(perm = var_6356, x = var_6352_cast_fp16)[name = string("transpose_173")]; - tensor var_6369_cast_fp16 = mul(x = hidden_states_105_cast_fp16, y = const_289_promoted_to_fp16)[name = string("op_6369_cast_fp16")]; - bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; - tensor input_131_cast_fp16 = concat(axis = var_6367, interleave = input_131_interleave_0, values = (hidden_states_105_cast_fp16, var_6369_cast_fp16))[name = string("input_131_cast_fp16")]; - tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; - fp16 var_6364_to_fp16 = const()[name = string("op_6364_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_6364_to_fp16, x = input_131_cast_fp16)[name = string("normed_157_cast_fp16")]; - tensor normed_159_begin_0 = const()[name = string("normed_159_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_159_end_0 = const()[name = string("normed_159_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_159_end_mask_0 = const()[name = string("normed_159_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_159_cast_fp16 = slice_by_index(begin = normed_159_begin_0, end = normed_159_end_0, end_mask = normed_159_end_mask_0, x = normed_157_cast_fp16)[name = string("normed_159_cast_fp16")]; - tensor var_6383_to_fp16 = const()[name = string("op_6383_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167256960)))]; - tensor attn_output_69_cast_fp16 = mul(x = normed_159_cast_fp16, y = var_6383_to_fp16)[name = string("attn_output_69_cast_fp16")]; - tensor hidden_states_107_cast_fp16 = add(x = hidden_states_97_cast_fp16, y = attn_output_69_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; - int32 var_6396 = const()[name = string("op_6396"), val = int32(-1)]; - fp16 const_293_promoted_to_fp16 = const()[name = string("const_293_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6398_cast_fp16 = mul(x = hidden_states_107_cast_fp16, y = const_293_promoted_to_fp16)[name = string("op_6398_cast_fp16")]; - bool input_133_interleave_0 = const()[name = string("input_133_interleave_0"), val = bool(false)]; - tensor input_133_cast_fp16 = concat(axis = var_6396, interleave = input_133_interleave_0, values = (hidden_states_107_cast_fp16, var_6398_cast_fp16))[name = string("input_133_cast_fp16")]; - tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; - fp16 var_6393_to_fp16 = const()[name = string("op_6393_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_6393_to_fp16, x = input_133_cast_fp16)[name = string("normed_161_cast_fp16")]; - tensor normed_163_begin_0 = const()[name = string("normed_163_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_163_end_0 = const()[name = string("normed_163_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_163_end_mask_0 = const()[name = string("normed_163_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_163_cast_fp16 = slice_by_index(begin = normed_163_begin_0, end = normed_163_end_0, end_mask = normed_163_end_mask_0, x = normed_161_cast_fp16)[name = string("normed_163_cast_fp16")]; - tensor var_6412_to_fp16 = const()[name = string("op_6412_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167259328)))]; - tensor x_109_cast_fp16 = mul(x = normed_163_cast_fp16, y = var_6412_to_fp16)[name = string("x_109_cast_fp16")]; - tensor var_6424 = const()[name = string("op_6424"), val = tensor([0, 2, 1])]; - tensor input_135_axes_0 = const()[name = string("input_135_axes_0"), val = tensor([2])]; - tensor var_6425_cast_fp16 = transpose(perm = var_6424, x = x_109_cast_fp16)[name = string("transpose_172")]; - tensor input_135_cast_fp16 = expand_dims(axes = input_135_axes_0, x = var_6425_cast_fp16)[name = string("input_135_cast_fp16")]; - string x_111_pad_type_0 = const()[name = string("x_111_pad_type_0"), val = string("valid")]; - tensor x_111_strides_0 = const()[name = string("x_111_strides_0"), val = tensor([1, 1])]; - tensor x_111_pad_0 = const()[name = string("x_111_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_111_dilations_0 = const()[name = string("x_111_dilations_0"), val = tensor([1, 1])]; - int32 x_111_groups_0 = const()[name = string("x_111_groups_0"), val = int32(1)]; - tensor model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(951752192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(957724224))))[name = string("model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_111_cast_fp16 = conv(dilations = x_111_dilations_0, groups = x_111_groups_0, pad = x_111_pad_0, pad_type = x_111_pad_type_0, strides = x_111_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_135_cast_fp16)[name = string("x_111_cast_fp16")]; - string b_13_pad_type_0 = const()[name = string("b_13_pad_type_0"), val = string("valid")]; - tensor b_13_strides_0 = const()[name = string("b_13_strides_0"), val = tensor([1, 1])]; - tensor b_13_pad_0 = const()[name = string("b_13_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_13_dilations_0 = const()[name = string("b_13_dilations_0"), val = tensor([1, 1])]; - int32 b_13_groups_0 = const()[name = string("b_13_groups_0"), val = int32(1)]; - tensor model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(957834880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(963806912))))[name = string("model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_13_cast_fp16 = conv(dilations = b_13_dilations_0, groups = b_13_groups_0, pad = b_13_pad_0, pad_type = b_13_pad_type_0, strides = b_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_135_cast_fp16)[name = string("b_13_cast_fp16")]; - string var_6450_mode_0 = const()[name = string("op_6450_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_6450_cast_fp16 = gelu(mode = var_6450_mode_0, x = x_111_cast_fp16)[name = string("op_6450_cast_fp16")]; - tensor input_137_cast_fp16 = mul(x = var_6450_cast_fp16, y = b_13_cast_fp16)[name = string("input_137_cast_fp16")]; - string e_13_pad_type_0 = const()[name = string("e_13_pad_type_0"), val = string("valid")]; - tensor e_13_strides_0 = const()[name = string("e_13_strides_0"), val = tensor([1, 1])]; - tensor e_13_pad_0 = const()[name = string("e_13_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_13_dilations_0 = const()[name = string("e_13_dilations_0"), val = tensor([1, 1])]; - int32 e_13_groups_0 = const()[name = string("e_13_groups_0"), val = int32(1)]; - tensor model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179427072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185399104))))[name = string("model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_13_cast_fp16 = conv(dilations = e_13_dilations_0, groups = e_13_groups_0, pad = e_13_pad_0, pad_type = e_13_pad_type_0, strides = e_13_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_137_cast_fp16)[name = string("e_13_cast_fp16")]; - tensor var_6458_axes_0 = const()[name = string("op_6458_axes_0"), val = tensor([2])]; - tensor var_6458_cast_fp16 = squeeze(axes = var_6458_axes_0, x = e_13_cast_fp16)[name = string("op_6458_cast_fp16")]; - tensor var_6459 = const()[name = string("op_6459"), val = tensor([0, 2, 1])]; - int32 var_6470 = const()[name = string("op_6470"), val = int32(-1)]; - fp16 const_297_promoted_to_fp16 = const()[name = string("const_297_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_109_cast_fp16 = transpose(perm = var_6459, x = var_6458_cast_fp16)[name = string("transpose_171")]; - tensor var_6472_cast_fp16 = mul(x = hidden_states_109_cast_fp16, y = const_297_promoted_to_fp16)[name = string("op_6472_cast_fp16")]; - bool input_139_interleave_0 = const()[name = string("input_139_interleave_0"), val = bool(false)]; - tensor input_139_cast_fp16 = concat(axis = var_6470, interleave = input_139_interleave_0, values = (hidden_states_109_cast_fp16, var_6472_cast_fp16))[name = string("input_139_cast_fp16")]; - tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; - fp16 var_6467_to_fp16 = const()[name = string("op_6467_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_6467_to_fp16, x = input_139_cast_fp16)[name = string("normed_165_cast_fp16")]; - tensor normed_167_begin_0 = const()[name = string("normed_167_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_167_end_0 = const()[name = string("normed_167_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_167_end_mask_0 = const()[name = string("normed_167_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_167_cast_fp16 = slice_by_index(begin = normed_167_begin_0, end = normed_167_end_0, end_mask = normed_167_end_mask_0, x = normed_165_cast_fp16)[name = string("normed_167_cast_fp16")]; - tensor var_6486_to_fp16 = const()[name = string("op_6486_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185417600)))]; - tensor hidden_states_111_cast_fp16 = mul(x = normed_167_cast_fp16, y = var_6486_to_fp16)[name = string("hidden_states_111_cast_fp16")]; - tensor hidden_states_113_cast_fp16 = add(x = hidden_states_107_cast_fp16, y = hidden_states_111_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; - int32 var_6540 = const()[name = string("op_6540"), val = int32(-1)]; - fp16 const_302_promoted_to_fp16 = const()[name = string("const_302_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6542_cast_fp16 = mul(x = hidden_states_113_cast_fp16, y = const_302_promoted_to_fp16)[name = string("op_6542_cast_fp16")]; - bool input_141_interleave_0 = const()[name = string("input_141_interleave_0"), val = bool(false)]; - tensor input_141_cast_fp16 = concat(axis = var_6540, interleave = input_141_interleave_0, values = (hidden_states_113_cast_fp16, var_6542_cast_fp16))[name = string("input_141_cast_fp16")]; - tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; - fp16 var_6537_to_fp16 = const()[name = string("op_6537_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_6537_to_fp16, x = input_141_cast_fp16)[name = string("normed_169_cast_fp16")]; - tensor normed_171_begin_0 = const()[name = string("normed_171_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_171_end_0 = const()[name = string("normed_171_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_171_end_mask_0 = const()[name = string("normed_171_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_171_cast_fp16 = slice_by_index(begin = normed_171_begin_0, end = normed_171_end_0, end_mask = normed_171_end_mask_0, x = normed_169_cast_fp16)[name = string("normed_171_cast_fp16")]; - tensor var_6556_to_fp16 = const()[name = string("op_6556_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185419968)))]; - tensor hidden_states_115_cast_fp16 = mul(x = normed_171_cast_fp16, y = var_6556_to_fp16)[name = string("hidden_states_115_cast_fp16")]; - tensor var_6567 = const()[name = string("op_6567"), val = tensor([0, 2, 1])]; - tensor var_6570_axes_0 = const()[name = string("op_6570_axes_0"), val = tensor([2])]; - tensor var_6568_cast_fp16 = transpose(perm = var_6567, x = hidden_states_115_cast_fp16)[name = string("transpose_170")]; - tensor var_6570_cast_fp16 = expand_dims(axes = var_6570_axes_0, x = var_6568_cast_fp16)[name = string("op_6570_cast_fp16")]; - string query_states_57_pad_type_0 = const()[name = string("query_states_57_pad_type_0"), val = string("valid")]; - tensor query_states_57_strides_0 = const()[name = string("query_states_57_strides_0"), val = tensor([1, 1])]; - tensor query_states_57_pad_0 = const()[name = string("query_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_57_dilations_0 = const()[name = string("query_states_57_dilations_0"), val = tensor([1, 1])]; - int32 query_states_57_groups_0 = const()[name = string("query_states_57_groups_0"), val = int32(1)]; - tensor query_states_57 = conv(dilations = query_states_57_dilations_0, groups = query_states_57_groups_0, pad = query_states_57_pad_0, pad_type = query_states_57_pad_type_0, strides = query_states_57_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_6570_cast_fp16)[name = string("query_states_57")]; - string key_states_71_pad_type_0 = const()[name = string("key_states_71_pad_type_0"), val = string("valid")]; - tensor key_states_71_strides_0 = const()[name = string("key_states_71_strides_0"), val = tensor([1, 1])]; - tensor key_states_71_pad_0 = const()[name = string("key_states_71_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_71_dilations_0 = const()[name = string("key_states_71_dilations_0"), val = tensor([1, 1])]; - int32 key_states_71_groups_0 = const()[name = string("key_states_71_groups_0"), val = int32(1)]; - tensor key_states_71 = conv(dilations = key_states_71_dilations_0, groups = key_states_71_groups_0, pad = key_states_71_pad_0, pad_type = key_states_71_pad_type_0, strides = key_states_71_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_6570_cast_fp16)[name = string("key_states_71")]; - string value_states_57_pad_type_0 = const()[name = string("value_states_57_pad_type_0"), val = string("valid")]; - tensor value_states_57_strides_0 = const()[name = string("value_states_57_strides_0"), val = tensor([1, 1])]; - tensor value_states_57_pad_0 = const()[name = string("value_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_57_dilations_0 = const()[name = string("value_states_57_dilations_0"), val = tensor([1, 1])]; - int32 value_states_57_groups_0 = const()[name = string("value_states_57_groups_0"), val = int32(1)]; - tensor value_states_57 = conv(dilations = value_states_57_dilations_0, groups = value_states_57_groups_0, pad = value_states_57_pad_0, pad_type = value_states_57_pad_type_0, strides = value_states_57_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_6570_cast_fp16)[name = string("value_states_57")]; - tensor var_6612 = const()[name = string("op_6612"), val = tensor([1, 4, 256, 64])]; - tensor var_6613 = reshape(shape = var_6612, x = query_states_57)[name = string("op_6613")]; - tensor var_6618 = const()[name = string("op_6618"), val = tensor([0, 1, 3, 2])]; - tensor var_6623 = const()[name = string("op_6623"), val = tensor([1, 1, 256, 64])]; - tensor var_6624 = reshape(shape = var_6623, x = key_states_71)[name = string("op_6624")]; - tensor var_6629 = const()[name = string("op_6629"), val = tensor([0, 1, 3, 2])]; - tensor var_6634 = const()[name = string("op_6634"), val = tensor([1, 1, 256, 64])]; - tensor var_6635 = reshape(shape = var_6634, x = value_states_57)[name = string("op_6635")]; - tensor var_6640 = const()[name = string("op_6640"), val = tensor([0, 1, 3, 2])]; - int32 var_6651 = const()[name = string("op_6651"), val = int32(-1)]; - fp16 const_307_promoted = const()[name = string("const_307_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_117 = transpose(perm = var_6618, x = var_6613)[name = string("transpose_169")]; - tensor var_6653 = mul(x = hidden_states_117, y = const_307_promoted)[name = string("op_6653")]; - bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; - tensor input_145 = concat(axis = var_6651, interleave = input_145_interleave_0, values = (hidden_states_117, var_6653))[name = string("input_145")]; - tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; - fp16 var_6648_to_fp16 = const()[name = string("op_6648_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_6648_to_fp16, x = input_145)[name = string("normed_173_cast_fp16")]; - tensor normed_175_begin_0 = const()[name = string("normed_175_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_175_end_0 = const()[name = string("normed_175_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_175_end_mask_0 = const()[name = string("normed_175_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_175 = slice_by_index(begin = normed_175_begin_0, end = normed_175_end_0, end_mask = normed_175_end_mask_0, x = normed_173_cast_fp16)[name = string("normed_175")]; - tensor var_6667_to_fp16 = const()[name = string("op_6667_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185422336)))]; - tensor q_15_cast_fp16 = mul(x = normed_175, y = var_6667_to_fp16)[name = string("q_15_cast_fp16")]; - int32 var_6678 = const()[name = string("op_6678"), val = int32(-1)]; - fp16 const_311_promoted = const()[name = string("const_311_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_119 = transpose(perm = var_6629, x = var_6624)[name = string("transpose_168")]; - tensor var_6680 = mul(x = hidden_states_119, y = const_311_promoted)[name = string("op_6680")]; - bool input_147_interleave_0 = const()[name = string("input_147_interleave_0"), val = bool(false)]; - tensor input_147 = concat(axis = var_6678, interleave = input_147_interleave_0, values = (hidden_states_119, var_6680))[name = string("input_147")]; - tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; - fp16 var_6675_to_fp16 = const()[name = string("op_6675_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_6675_to_fp16, x = input_147)[name = string("normed_177_cast_fp16")]; - tensor normed_179_begin_0 = const()[name = string("normed_179_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_179_end_0 = const()[name = string("normed_179_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_179_end_mask_0 = const()[name = string("normed_179_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_179 = slice_by_index(begin = normed_179_begin_0, end = normed_179_end_0, end_mask = normed_179_end_mask_0, x = normed_177_cast_fp16)[name = string("normed_179")]; - tensor var_6694_to_fp16 = const()[name = string("op_6694_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185422912)))]; - tensor k_15_cast_fp16 = mul(x = normed_179, y = var_6694_to_fp16)[name = string("k_15_cast_fp16")]; - tensor var_6708_cast_fp16 = mul(x = q_15_cast_fp16, y = cos_5)[name = string("op_6708_cast_fp16")]; - tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15_cast_fp16)[name = string("x1_29_cast_fp16")]; - tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15_cast_fp16)[name = string("x2_29_cast_fp16")]; - fp16 const_317_promoted_to_fp16 = const()[name = string("const_317_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6729_cast_fp16 = mul(x = x2_29_cast_fp16, y = const_317_promoted_to_fp16)[name = string("op_6729_cast_fp16")]; - int32 var_6731 = const()[name = string("op_6731"), val = int32(-1)]; - bool var_6732_interleave_0 = const()[name = string("op_6732_interleave_0"), val = bool(false)]; - tensor var_6732_cast_fp16 = concat(axis = var_6731, interleave = var_6732_interleave_0, values = (var_6729_cast_fp16, x1_29_cast_fp16))[name = string("op_6732_cast_fp16")]; - tensor var_6733_cast_fp16 = mul(x = var_6732_cast_fp16, y = sin_5)[name = string("op_6733_cast_fp16")]; - tensor query_states_59_cast_fp16 = add(x = var_6708_cast_fp16, y = var_6733_cast_fp16)[name = string("query_states_59_cast_fp16")]; - tensor var_6736_cast_fp16 = mul(x = k_15_cast_fp16, y = cos_5)[name = string("op_6736_cast_fp16")]; - tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15_cast_fp16)[name = string("x1_31_cast_fp16")]; - tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15_cast_fp16)[name = string("x2_31_cast_fp16")]; - fp16 const_320_promoted_to_fp16 = const()[name = string("const_320_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6757_cast_fp16 = mul(x = x2_31_cast_fp16, y = const_320_promoted_to_fp16)[name = string("op_6757_cast_fp16")]; - int32 var_6759 = const()[name = string("op_6759"), val = int32(-1)]; - bool var_6760_interleave_0 = const()[name = string("op_6760_interleave_0"), val = bool(false)]; - tensor var_6760_cast_fp16 = concat(axis = var_6759, interleave = var_6760_interleave_0, values = (var_6757_cast_fp16, x1_31_cast_fp16))[name = string("op_6760_cast_fp16")]; - tensor var_6761_cast_fp16 = mul(x = var_6760_cast_fp16, y = sin_5)[name = string("op_6761_cast_fp16")]; - tensor key_states_73_cast_fp16 = add(x = var_6736_cast_fp16, y = var_6761_cast_fp16)[name = string("key_states_73_cast_fp16")]; - tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([6])]; - tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; - tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; - tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([7])]; - int32 concat_128_axis_0 = const()[name = string("concat_128_axis_0"), val = int32(0)]; - bool concat_128_interleave_0 = const()[name = string("concat_128_interleave_0"), val = bool(false)]; - tensor concat_128 = concat(axis = concat_128_axis_0, interleave = concat_128_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_128")]; - tensor concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor([0])]; - tensor concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor([0])]; - int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; - bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; - tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (expand_dims_88, concat_129_values1_0, end_pos_1, concat_129_values3_0))[name = string("concat_129")]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_128, begin_mask = model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0, end = concat_129, end_mask = model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_13_stride_0, update = key_states_73_cast_fp16, x = coreml_update_state_65)[name = string("model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_118_write_state")]; - tensor coreml_update_state_66 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_118")]; - tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([28])]; - tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; - tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; - tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([29])]; - int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; - bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; - tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_132")]; - tensor concat_133_values1_0 = const()[name = string("concat_133_values1_0"), val = tensor([0])]; - tensor concat_133_values3_0 = const()[name = string("concat_133_values3_0"), val = tensor([0])]; - int32 concat_133_axis_0 = const()[name = string("concat_133_axis_0"), val = int32(0)]; - bool concat_133_interleave_0 = const()[name = string("concat_133_interleave_0"), val = bool(false)]; - tensor concat_133 = concat(axis = concat_133_axis_0, interleave = concat_133_interleave_0, values = (expand_dims_94, concat_133_values1_0, end_pos_1, concat_133_values3_0))[name = string("concat_133")]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_59 = transpose(perm = var_6640, x = var_6635)[name = string("transpose_167")]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_132, begin_mask = model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0, end = concat_133, end_mask = model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_14_stride_0, update = value_states_59, x = coreml_update_state_66)[name = string("model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_119_write_state")]; - tensor coreml_update_state_67 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_119")]; - tensor var_6860_begin_0 = const()[name = string("op_6860_begin_0"), val = tensor([6, 0, 0, 0])]; - tensor var_6860_end_0 = const()[name = string("op_6860_end_0"), val = tensor([7, 1, 512, 256])]; - tensor var_6860_end_mask_0 = const()[name = string("op_6860_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_6860_cast_fp16 = slice_by_index(begin = var_6860_begin_0, end = var_6860_end_0, end_mask = var_6860_end_mask_0, x = coreml_update_state_67)[name = string("op_6860_cast_fp16")]; - tensor var_6867_begin_0 = const()[name = string("op_6867_begin_0"), val = tensor([28, 0, 0, 0])]; - tensor var_6867_end_0 = const()[name = string("op_6867_end_0"), val = tensor([29, 1, 512, 256])]; - tensor var_6867_end_mask_0 = const()[name = string("op_6867_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_6867_cast_fp16 = slice_by_index(begin = var_6867_begin_0, end = var_6867_end_0, end_mask = var_6867_end_mask_0, x = coreml_update_state_67)[name = string("op_6867_cast_fp16")]; - tensor var_6906 = const()[name = string("op_6906"), val = tensor([1, 4, 1, 1])]; - tensor x_117_cast_fp16 = tile(reps = var_6906, x = var_6860_cast_fp16)[name = string("x_117_cast_fp16")]; - tensor var_6926 = const()[name = string("op_6926"), val = tensor([1, 4, 1, 1])]; - tensor x_123_cast_fp16 = tile(reps = var_6926, x = var_6867_cast_fp16)[name = string("x_123_cast_fp16")]; - bool var_6953_transpose_x_0 = const()[name = string("op_6953_transpose_x_0"), val = bool(false)]; - bool var_6953_transpose_y_0 = const()[name = string("op_6953_transpose_y_0"), val = bool(true)]; - tensor var_6953 = matmul(transpose_x = var_6953_transpose_x_0, transpose_y = var_6953_transpose_y_0, x = query_states_59_cast_fp16, y = x_117_cast_fp16)[name = string("op_6953")]; - fp16 var_6954_to_fp16 = const()[name = string("op_6954_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_29_cast_fp16 = mul(x = var_6953, y = var_6954_to_fp16)[name = string("attn_weights_29_cast_fp16")]; - tensor attn_weights_31_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = mask_slice_1)[name = string("attn_weights_31_cast_fp16")]; - int32 var_6989 = const()[name = string("op_6989"), val = int32(-1)]; - tensor var_6991_cast_fp16 = softmax(axis = var_6989, x = attn_weights_31_cast_fp16)[name = string("op_6991_cast_fp16")]; - tensor concat_138 = const()[name = string("concat_138"), val = tensor([4, 64, 512])]; - tensor reshape_21_cast_fp16 = reshape(shape = concat_138, x = var_6991_cast_fp16)[name = string("reshape_21_cast_fp16")]; - tensor concat_139 = const()[name = string("concat_139"), val = tensor([4, 512, 256])]; - tensor reshape_22_cast_fp16 = reshape(shape = concat_139, x = x_123_cast_fp16)[name = string("reshape_22_cast_fp16")]; - bool matmul_7_transpose_x_0 = const()[name = string("matmul_7_transpose_x_0"), val = bool(false)]; - bool matmul_7_transpose_y_0 = const()[name = string("matmul_7_transpose_y_0"), val = bool(false)]; - tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = reshape_21_cast_fp16, y = reshape_22_cast_fp16)[name = string("matmul_7_cast_fp16")]; - tensor concat_143 = const()[name = string("concat_143"), val = tensor([1, 4, 64, 256])]; - tensor reshape_23_cast_fp16 = reshape(shape = concat_143, x = matmul_7_cast_fp16)[name = string("reshape_23_cast_fp16")]; - tensor var_7003_perm_0 = const()[name = string("op_7003_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_7022 = const()[name = string("op_7022"), val = tensor([1, 64, 1024])]; - tensor var_7003_cast_fp16 = transpose(perm = var_7003_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_166")]; - tensor attn_output_75_cast_fp16 = reshape(shape = var_7022, x = var_7003_cast_fp16)[name = string("attn_output_75_cast_fp16")]; - tensor var_7027 = const()[name = string("op_7027"), val = tensor([0, 2, 1])]; - string var_7043_pad_type_0 = const()[name = string("op_7043_pad_type_0"), val = string("valid")]; - int32 var_7043_groups_0 = const()[name = string("op_7043_groups_0"), val = int32(1)]; - tensor var_7043_strides_0 = const()[name = string("op_7043_strides_0"), val = tensor([1])]; - tensor var_7043_pad_0 = const()[name = string("op_7043_pad_0"), val = tensor([0, 0])]; - tensor var_7043_dilations_0 = const()[name = string("op_7043_dilations_0"), val = tensor([1])]; - tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185423488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186308288))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_7028_cast_fp16 = transpose(perm = var_7027, x = attn_output_75_cast_fp16)[name = string("transpose_165")]; - tensor var_7043_cast_fp16 = conv(dilations = var_7043_dilations_0, groups = var_7043_groups_0, pad = var_7043_pad_0, pad_type = var_7043_pad_type_0, strides = var_7043_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_7028_cast_fp16)[name = string("op_7043_cast_fp16")]; - tensor var_7047 = const()[name = string("op_7047"), val = tensor([0, 2, 1])]; - int32 var_7058 = const()[name = string("op_7058"), val = int32(-1)]; - fp16 const_332_promoted_to_fp16 = const()[name = string("const_332_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_121_cast_fp16 = transpose(perm = var_7047, x = var_7043_cast_fp16)[name = string("transpose_164")]; - tensor var_7060_cast_fp16 = mul(x = hidden_states_121_cast_fp16, y = const_332_promoted_to_fp16)[name = string("op_7060_cast_fp16")]; - bool input_151_interleave_0 = const()[name = string("input_151_interleave_0"), val = bool(false)]; - tensor input_151_cast_fp16 = concat(axis = var_7058, interleave = input_151_interleave_0, values = (hidden_states_121_cast_fp16, var_7060_cast_fp16))[name = string("input_151_cast_fp16")]; - tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; - fp16 var_7055_to_fp16 = const()[name = string("op_7055_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_7055_to_fp16, x = input_151_cast_fp16)[name = string("normed_181_cast_fp16")]; - tensor normed_183_begin_0 = const()[name = string("normed_183_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_183_end_0 = const()[name = string("normed_183_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_183_end_mask_0 = const()[name = string("normed_183_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_183_cast_fp16 = slice_by_index(begin = normed_183_begin_0, end = normed_183_end_0, end_mask = normed_183_end_mask_0, x = normed_181_cast_fp16)[name = string("normed_183_cast_fp16")]; - tensor var_7074_to_fp16 = const()[name = string("op_7074_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186326784)))]; - tensor attn_output_79_cast_fp16 = mul(x = normed_183_cast_fp16, y = var_7074_to_fp16)[name = string("attn_output_79_cast_fp16")]; - tensor hidden_states_123_cast_fp16 = add(x = hidden_states_113_cast_fp16, y = attn_output_79_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; - int32 var_7087 = const()[name = string("op_7087"), val = int32(-1)]; - fp16 const_336_promoted_to_fp16 = const()[name = string("const_336_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7089_cast_fp16 = mul(x = hidden_states_123_cast_fp16, y = const_336_promoted_to_fp16)[name = string("op_7089_cast_fp16")]; - bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; - tensor input_153_cast_fp16 = concat(axis = var_7087, interleave = input_153_interleave_0, values = (hidden_states_123_cast_fp16, var_7089_cast_fp16))[name = string("input_153_cast_fp16")]; - tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; - fp16 var_7084_to_fp16 = const()[name = string("op_7084_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_7084_to_fp16, x = input_153_cast_fp16)[name = string("normed_185_cast_fp16")]; - tensor normed_187_begin_0 = const()[name = string("normed_187_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_187_end_0 = const()[name = string("normed_187_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_187_end_mask_0 = const()[name = string("normed_187_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_187_cast_fp16 = slice_by_index(begin = normed_187_begin_0, end = normed_187_end_0, end_mask = normed_187_end_mask_0, x = normed_185_cast_fp16)[name = string("normed_187_cast_fp16")]; - tensor var_7103_to_fp16 = const()[name = string("op_7103_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186329152)))]; - tensor x_125_cast_fp16 = mul(x = normed_187_cast_fp16, y = var_7103_to_fp16)[name = string("x_125_cast_fp16")]; - tensor var_7115 = const()[name = string("op_7115"), val = tensor([0, 2, 1])]; - tensor input_155_axes_0 = const()[name = string("input_155_axes_0"), val = tensor([2])]; - tensor var_7116_cast_fp16 = transpose(perm = var_7115, x = x_125_cast_fp16)[name = string("transpose_163")]; - tensor input_155_cast_fp16 = expand_dims(axes = input_155_axes_0, x = var_7116_cast_fp16)[name = string("input_155_cast_fp16")]; - string x_127_pad_type_0 = const()[name = string("x_127_pad_type_0"), val = string("valid")]; - tensor x_127_strides_0 = const()[name = string("x_127_strides_0"), val = tensor([1, 1])]; - tensor x_127_pad_0 = const()[name = string("x_127_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_127_dilations_0 = const()[name = string("x_127_dilations_0"), val = tensor([1, 1])]; - int32 x_127_groups_0 = const()[name = string("x_127_groups_0"), val = int32(1)]; - tensor model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(963917568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(969889600))))[name = string("model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_127_cast_fp16 = conv(dilations = x_127_dilations_0, groups = x_127_groups_0, pad = x_127_pad_0, pad_type = x_127_pad_type_0, strides = x_127_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_155_cast_fp16)[name = string("x_127_cast_fp16")]; - string b_15_pad_type_0 = const()[name = string("b_15_pad_type_0"), val = string("valid")]; - tensor b_15_strides_0 = const()[name = string("b_15_strides_0"), val = tensor([1, 1])]; - tensor b_15_pad_0 = const()[name = string("b_15_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_15_dilations_0 = const()[name = string("b_15_dilations_0"), val = tensor([1, 1])]; - int32 b_15_groups_0 = const()[name = string("b_15_groups_0"), val = int32(1)]; - tensor model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(970000256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(975972288))))[name = string("model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_15_cast_fp16 = conv(dilations = b_15_dilations_0, groups = b_15_groups_0, pad = b_15_pad_0, pad_type = b_15_pad_type_0, strides = b_15_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_155_cast_fp16)[name = string("b_15_cast_fp16")]; - string var_7141_mode_0 = const()[name = string("op_7141_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_7141_cast_fp16 = gelu(mode = var_7141_mode_0, x = x_127_cast_fp16)[name = string("op_7141_cast_fp16")]; - tensor input_157_cast_fp16 = mul(x = var_7141_cast_fp16, y = b_15_cast_fp16)[name = string("input_157_cast_fp16")]; - string e_15_pad_type_0 = const()[name = string("e_15_pad_type_0"), val = string("valid")]; - tensor e_15_strides_0 = const()[name = string("e_15_strides_0"), val = tensor([1, 1])]; - tensor e_15_pad_0 = const()[name = string("e_15_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_15_dilations_0 = const()[name = string("e_15_dilations_0"), val = tensor([1, 1])]; - int32 e_15_groups_0 = const()[name = string("e_15_groups_0"), val = int32(1)]; - tensor model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198496896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204468928))))[name = string("model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_15_cast_fp16 = conv(dilations = e_15_dilations_0, groups = e_15_groups_0, pad = e_15_pad_0, pad_type = e_15_pad_type_0, strides = e_15_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_157_cast_fp16)[name = string("e_15_cast_fp16")]; - tensor var_7149_axes_0 = const()[name = string("op_7149_axes_0"), val = tensor([2])]; - tensor var_7149_cast_fp16 = squeeze(axes = var_7149_axes_0, x = e_15_cast_fp16)[name = string("op_7149_cast_fp16")]; - tensor var_7150 = const()[name = string("op_7150"), val = tensor([0, 2, 1])]; - int32 var_7161 = const()[name = string("op_7161"), val = int32(-1)]; - fp16 const_340_promoted_to_fp16 = const()[name = string("const_340_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_125_cast_fp16 = transpose(perm = var_7150, x = var_7149_cast_fp16)[name = string("transpose_162")]; - tensor var_7163_cast_fp16 = mul(x = hidden_states_125_cast_fp16, y = const_340_promoted_to_fp16)[name = string("op_7163_cast_fp16")]; - bool input_159_interleave_0 = const()[name = string("input_159_interleave_0"), val = bool(false)]; - tensor input_159_cast_fp16 = concat(axis = var_7161, interleave = input_159_interleave_0, values = (hidden_states_125_cast_fp16, var_7163_cast_fp16))[name = string("input_159_cast_fp16")]; - tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; - fp16 var_7158_to_fp16 = const()[name = string("op_7158_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_7158_to_fp16, x = input_159_cast_fp16)[name = string("normed_189_cast_fp16")]; - tensor normed_191_begin_0 = const()[name = string("normed_191_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_191_end_0 = const()[name = string("normed_191_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_191_end_mask_0 = const()[name = string("normed_191_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_191_cast_fp16 = slice_by_index(begin = normed_191_begin_0, end = normed_191_end_0, end_mask = normed_191_end_mask_0, x = normed_189_cast_fp16)[name = string("normed_191_cast_fp16")]; - tensor var_7177_to_fp16 = const()[name = string("op_7177_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204487424)))]; - tensor hidden_states_127_cast_fp16 = mul(x = normed_191_cast_fp16, y = var_7177_to_fp16)[name = string("hidden_states_127_cast_fp16")]; - tensor hidden_states_129_cast_fp16 = add(x = hidden_states_123_cast_fp16, y = hidden_states_127_cast_fp16)[name = string("hidden_states_129_cast_fp16")]; - int32 var_7231 = const()[name = string("op_7231"), val = int32(-1)]; - fp16 const_345_promoted_to_fp16 = const()[name = string("const_345_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7233_cast_fp16 = mul(x = hidden_states_129_cast_fp16, y = const_345_promoted_to_fp16)[name = string("op_7233_cast_fp16")]; - bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; - tensor input_161_cast_fp16 = concat(axis = var_7231, interleave = input_161_interleave_0, values = (hidden_states_129_cast_fp16, var_7233_cast_fp16))[name = string("input_161_cast_fp16")]; - tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; - fp16 var_7228_to_fp16 = const()[name = string("op_7228_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_7228_to_fp16, x = input_161_cast_fp16)[name = string("normed_193_cast_fp16")]; - tensor normed_195_begin_0 = const()[name = string("normed_195_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_195_end_0 = const()[name = string("normed_195_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_195_end_mask_0 = const()[name = string("normed_195_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_195_cast_fp16 = slice_by_index(begin = normed_195_begin_0, end = normed_195_end_0, end_mask = normed_195_end_mask_0, x = normed_193_cast_fp16)[name = string("normed_195_cast_fp16")]; - tensor var_7247_to_fp16 = const()[name = string("op_7247_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204489792)))]; - tensor hidden_states_131_cast_fp16 = mul(x = normed_195_cast_fp16, y = var_7247_to_fp16)[name = string("hidden_states_131_cast_fp16")]; - tensor var_7258 = const()[name = string("op_7258"), val = tensor([0, 2, 1])]; - tensor var_7261_axes_0 = const()[name = string("op_7261_axes_0"), val = tensor([2])]; - tensor var_7259_cast_fp16 = transpose(perm = var_7258, x = hidden_states_131_cast_fp16)[name = string("transpose_161")]; - tensor var_7261_cast_fp16 = expand_dims(axes = var_7261_axes_0, x = var_7259_cast_fp16)[name = string("op_7261_cast_fp16")]; - string query_states_65_pad_type_0 = const()[name = string("query_states_65_pad_type_0"), val = string("valid")]; - tensor query_states_65_strides_0 = const()[name = string("query_states_65_strides_0"), val = tensor([1, 1])]; - tensor query_states_65_pad_0 = const()[name = string("query_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_65_dilations_0 = const()[name = string("query_states_65_dilations_0"), val = tensor([1, 1])]; - int32 query_states_65_groups_0 = const()[name = string("query_states_65_groups_0"), val = int32(1)]; - tensor query_states_65 = conv(dilations = query_states_65_dilations_0, groups = query_states_65_groups_0, pad = query_states_65_pad_0, pad_type = query_states_65_pad_type_0, strides = query_states_65_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_7261_cast_fp16)[name = string("query_states_65")]; - string key_states_81_pad_type_0 = const()[name = string("key_states_81_pad_type_0"), val = string("valid")]; - tensor key_states_81_strides_0 = const()[name = string("key_states_81_strides_0"), val = tensor([1, 1])]; - tensor key_states_81_pad_0 = const()[name = string("key_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_81_dilations_0 = const()[name = string("key_states_81_dilations_0"), val = tensor([1, 1])]; - int32 key_states_81_groups_0 = const()[name = string("key_states_81_groups_0"), val = int32(1)]; - tensor key_states_81 = conv(dilations = key_states_81_dilations_0, groups = key_states_81_groups_0, pad = key_states_81_pad_0, pad_type = key_states_81_pad_type_0, strides = key_states_81_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_7261_cast_fp16)[name = string("key_states_81")]; - string value_states_65_pad_type_0 = const()[name = string("value_states_65_pad_type_0"), val = string("valid")]; - tensor value_states_65_strides_0 = const()[name = string("value_states_65_strides_0"), val = tensor([1, 1])]; - tensor value_states_65_pad_0 = const()[name = string("value_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_65_dilations_0 = const()[name = string("value_states_65_dilations_0"), val = tensor([1, 1])]; - int32 value_states_65_groups_0 = const()[name = string("value_states_65_groups_0"), val = int32(1)]; - tensor value_states_65 = conv(dilations = value_states_65_dilations_0, groups = value_states_65_groups_0, pad = value_states_65_pad_0, pad_type = value_states_65_pad_type_0, strides = value_states_65_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_7261_cast_fp16)[name = string("value_states_65")]; - tensor var_7303 = const()[name = string("op_7303"), val = tensor([1, 4, 256, 64])]; - tensor var_7304 = reshape(shape = var_7303, x = query_states_65)[name = string("op_7304")]; - tensor var_7309 = const()[name = string("op_7309"), val = tensor([0, 1, 3, 2])]; - tensor var_7314 = const()[name = string("op_7314"), val = tensor([1, 1, 256, 64])]; - tensor var_7315 = reshape(shape = var_7314, x = key_states_81)[name = string("op_7315")]; - tensor var_7320 = const()[name = string("op_7320"), val = tensor([0, 1, 3, 2])]; - tensor var_7325 = const()[name = string("op_7325"), val = tensor([1, 1, 256, 64])]; - tensor var_7326 = reshape(shape = var_7325, x = value_states_65)[name = string("op_7326")]; - tensor var_7331 = const()[name = string("op_7331"), val = tensor([0, 1, 3, 2])]; - int32 var_7342 = const()[name = string("op_7342"), val = int32(-1)]; - fp16 const_350_promoted = const()[name = string("const_350_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_133 = transpose(perm = var_7309, x = var_7304)[name = string("transpose_160")]; - tensor var_7344 = mul(x = hidden_states_133, y = const_350_promoted)[name = string("op_7344")]; - bool input_165_interleave_0 = const()[name = string("input_165_interleave_0"), val = bool(false)]; - tensor input_165 = concat(axis = var_7342, interleave = input_165_interleave_0, values = (hidden_states_133, var_7344))[name = string("input_165")]; - tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; - fp16 var_7339_to_fp16 = const()[name = string("op_7339_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_7339_to_fp16, x = input_165)[name = string("normed_197_cast_fp16")]; - tensor normed_199_begin_0 = const()[name = string("normed_199_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_199_end_0 = const()[name = string("normed_199_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_199_end_mask_0 = const()[name = string("normed_199_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_199 = slice_by_index(begin = normed_199_begin_0, end = normed_199_end_0, end_mask = normed_199_end_mask_0, x = normed_197_cast_fp16)[name = string("normed_199")]; - tensor var_7358_to_fp16 = const()[name = string("op_7358_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204492160)))]; - tensor q_17_cast_fp16 = mul(x = normed_199, y = var_7358_to_fp16)[name = string("q_17_cast_fp16")]; - int32 var_7369 = const()[name = string("op_7369"), val = int32(-1)]; - fp16 const_354_promoted = const()[name = string("const_354_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_135 = transpose(perm = var_7320, x = var_7315)[name = string("transpose_159")]; - tensor var_7371 = mul(x = hidden_states_135, y = const_354_promoted)[name = string("op_7371")]; - bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; - tensor input_167 = concat(axis = var_7369, interleave = input_167_interleave_0, values = (hidden_states_135, var_7371))[name = string("input_167")]; - tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; - fp16 var_7366_to_fp16 = const()[name = string("op_7366_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_7366_to_fp16, x = input_167)[name = string("normed_201_cast_fp16")]; - tensor normed_203_begin_0 = const()[name = string("normed_203_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_203_end_0 = const()[name = string("normed_203_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_203_end_mask_0 = const()[name = string("normed_203_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_203 = slice_by_index(begin = normed_203_begin_0, end = normed_203_end_0, end_mask = normed_203_end_mask_0, x = normed_201_cast_fp16)[name = string("normed_203")]; - tensor var_7385_to_fp16 = const()[name = string("op_7385_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204492736)))]; - tensor k_17_cast_fp16 = mul(x = normed_203, y = var_7385_to_fp16)[name = string("k_17_cast_fp16")]; - tensor var_7399_cast_fp16 = mul(x = q_17_cast_fp16, y = cos_5)[name = string("op_7399_cast_fp16")]; - tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17_cast_fp16)[name = string("x1_33_cast_fp16")]; - tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17_cast_fp16)[name = string("x2_33_cast_fp16")]; - fp16 const_360_promoted_to_fp16 = const()[name = string("const_360_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7420_cast_fp16 = mul(x = x2_33_cast_fp16, y = const_360_promoted_to_fp16)[name = string("op_7420_cast_fp16")]; - int32 var_7422 = const()[name = string("op_7422"), val = int32(-1)]; - bool var_7423_interleave_0 = const()[name = string("op_7423_interleave_0"), val = bool(false)]; - tensor var_7423_cast_fp16 = concat(axis = var_7422, interleave = var_7423_interleave_0, values = (var_7420_cast_fp16, x1_33_cast_fp16))[name = string("op_7423_cast_fp16")]; - tensor var_7424_cast_fp16 = mul(x = var_7423_cast_fp16, y = sin_5)[name = string("op_7424_cast_fp16")]; - tensor query_states_67_cast_fp16 = add(x = var_7399_cast_fp16, y = var_7424_cast_fp16)[name = string("query_states_67_cast_fp16")]; - tensor var_7427_cast_fp16 = mul(x = k_17_cast_fp16, y = cos_5)[name = string("op_7427_cast_fp16")]; - tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17_cast_fp16)[name = string("x1_35_cast_fp16")]; - tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17_cast_fp16)[name = string("x2_35_cast_fp16")]; - fp16 const_363_promoted_to_fp16 = const()[name = string("const_363_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7448_cast_fp16 = mul(x = x2_35_cast_fp16, y = const_363_promoted_to_fp16)[name = string("op_7448_cast_fp16")]; - int32 var_7450 = const()[name = string("op_7450"), val = int32(-1)]; - bool var_7451_interleave_0 = const()[name = string("op_7451_interleave_0"), val = bool(false)]; - tensor var_7451_cast_fp16 = concat(axis = var_7450, interleave = var_7451_interleave_0, values = (var_7448_cast_fp16, x1_35_cast_fp16))[name = string("op_7451_cast_fp16")]; - tensor var_7452_cast_fp16 = mul(x = var_7451_cast_fp16, y = sin_5)[name = string("op_7452_cast_fp16")]; - tensor key_states_83_cast_fp16 = add(x = var_7427_cast_fp16, y = var_7452_cast_fp16)[name = string("key_states_83_cast_fp16")]; - tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([7])]; - tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; - tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; - tensor expand_dims_100 = const()[name = string("expand_dims_100"), val = tensor([8])]; - int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; - bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; - tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (expand_dims_96, expand_dims_97, current_pos, expand_dims_99))[name = string("concat_146")]; - tensor concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor([0])]; - tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; - int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; - bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; - tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (expand_dims_100, concat_147_values1_0, end_pos_1, concat_147_values3_0))[name = string("concat_147")]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_146, begin_mask = model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0, end = concat_147, end_mask = model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_15_stride_0, update = key_states_83_cast_fp16, x = coreml_update_state_67)[name = string("model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_120_write_state")]; - tensor coreml_update_state_68 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_120")]; - tensor expand_dims_102 = const()[name = string("expand_dims_102"), val = tensor([29])]; - tensor expand_dims_103 = const()[name = string("expand_dims_103"), val = tensor([0])]; - tensor expand_dims_105 = const()[name = string("expand_dims_105"), val = tensor([0])]; - tensor expand_dims_106 = const()[name = string("expand_dims_106"), val = tensor([30])]; - int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; - bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; - tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (expand_dims_102, expand_dims_103, current_pos, expand_dims_105))[name = string("concat_150")]; - tensor concat_151_values1_0 = const()[name = string("concat_151_values1_0"), val = tensor([0])]; - tensor concat_151_values3_0 = const()[name = string("concat_151_values3_0"), val = tensor([0])]; - int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; - bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; - tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (expand_dims_106, concat_151_values1_0, end_pos_1, concat_151_values3_0))[name = string("concat_151")]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_67 = transpose(perm = var_7331, x = var_7326)[name = string("transpose_158")]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_150, begin_mask = model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0, end = concat_151, end_mask = model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_16_stride_0, update = value_states_67, x = coreml_update_state_68)[name = string("model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_121_write_state")]; - tensor coreml_update_state_69 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_121")]; - tensor var_7551_begin_0 = const()[name = string("op_7551_begin_0"), val = tensor([7, 0, 0, 0])]; - tensor var_7551_end_0 = const()[name = string("op_7551_end_0"), val = tensor([8, 1, 512, 256])]; - tensor var_7551_end_mask_0 = const()[name = string("op_7551_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_7551_cast_fp16 = slice_by_index(begin = var_7551_begin_0, end = var_7551_end_0, end_mask = var_7551_end_mask_0, x = coreml_update_state_69)[name = string("op_7551_cast_fp16")]; - tensor var_7558_begin_0 = const()[name = string("op_7558_begin_0"), val = tensor([29, 0, 0, 0])]; - tensor var_7558_end_0 = const()[name = string("op_7558_end_0"), val = tensor([30, 1, 512, 256])]; - tensor var_7558_end_mask_0 = const()[name = string("op_7558_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_7558_cast_fp16 = slice_by_index(begin = var_7558_begin_0, end = var_7558_end_0, end_mask = var_7558_end_mask_0, x = coreml_update_state_69)[name = string("op_7558_cast_fp16")]; - tensor var_7597 = const()[name = string("op_7597"), val = tensor([1, 4, 1, 1])]; - tensor x_133_cast_fp16 = tile(reps = var_7597, x = var_7551_cast_fp16)[name = string("x_133_cast_fp16")]; - tensor var_7617 = const()[name = string("op_7617"), val = tensor([1, 4, 1, 1])]; - tensor x_139_cast_fp16 = tile(reps = var_7617, x = var_7558_cast_fp16)[name = string("x_139_cast_fp16")]; - bool var_7644_transpose_x_0 = const()[name = string("op_7644_transpose_x_0"), val = bool(false)]; - bool var_7644_transpose_y_0 = const()[name = string("op_7644_transpose_y_0"), val = bool(true)]; - tensor var_7644 = matmul(transpose_x = var_7644_transpose_x_0, transpose_y = var_7644_transpose_y_0, x = query_states_67_cast_fp16, y = x_133_cast_fp16)[name = string("op_7644")]; - fp16 var_7645_to_fp16 = const()[name = string("op_7645_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_33_cast_fp16 = mul(x = var_7644, y = var_7645_to_fp16)[name = string("attn_weights_33_cast_fp16")]; - tensor attn_weights_35_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = mask_slice_1)[name = string("attn_weights_35_cast_fp16")]; - int32 var_7680 = const()[name = string("op_7680"), val = int32(-1)]; - tensor var_7682_cast_fp16 = softmax(axis = var_7680, x = attn_weights_35_cast_fp16)[name = string("op_7682_cast_fp16")]; - tensor concat_156 = const()[name = string("concat_156"), val = tensor([4, 64, 512])]; - tensor reshape_24_cast_fp16 = reshape(shape = concat_156, x = var_7682_cast_fp16)[name = string("reshape_24_cast_fp16")]; - tensor concat_157 = const()[name = string("concat_157"), val = tensor([4, 512, 256])]; - tensor reshape_25_cast_fp16 = reshape(shape = concat_157, x = x_139_cast_fp16)[name = string("reshape_25_cast_fp16")]; - bool matmul_8_transpose_x_0 = const()[name = string("matmul_8_transpose_x_0"), val = bool(false)]; - bool matmul_8_transpose_y_0 = const()[name = string("matmul_8_transpose_y_0"), val = bool(false)]; - tensor matmul_8_cast_fp16 = matmul(transpose_x = matmul_8_transpose_x_0, transpose_y = matmul_8_transpose_y_0, x = reshape_24_cast_fp16, y = reshape_25_cast_fp16)[name = string("matmul_8_cast_fp16")]; - tensor concat_161 = const()[name = string("concat_161"), val = tensor([1, 4, 64, 256])]; - tensor reshape_26_cast_fp16 = reshape(shape = concat_161, x = matmul_8_cast_fp16)[name = string("reshape_26_cast_fp16")]; - tensor var_7694_perm_0 = const()[name = string("op_7694_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_7713 = const()[name = string("op_7713"), val = tensor([1, 64, 1024])]; - tensor var_7694_cast_fp16 = transpose(perm = var_7694_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_157")]; - tensor attn_output_85_cast_fp16 = reshape(shape = var_7713, x = var_7694_cast_fp16)[name = string("attn_output_85_cast_fp16")]; - tensor var_7718 = const()[name = string("op_7718"), val = tensor([0, 2, 1])]; - string var_7734_pad_type_0 = const()[name = string("op_7734_pad_type_0"), val = string("valid")]; - int32 var_7734_groups_0 = const()[name = string("op_7734_groups_0"), val = int32(1)]; - tensor var_7734_strides_0 = const()[name = string("op_7734_strides_0"), val = tensor([1])]; - tensor var_7734_pad_0 = const()[name = string("op_7734_pad_0"), val = tensor([0, 0])]; - tensor var_7734_dilations_0 = const()[name = string("op_7734_dilations_0"), val = tensor([1])]; - tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204493312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205378112))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_7719_cast_fp16 = transpose(perm = var_7718, x = attn_output_85_cast_fp16)[name = string("transpose_156")]; - tensor var_7734_cast_fp16 = conv(dilations = var_7734_dilations_0, groups = var_7734_groups_0, pad = var_7734_pad_0, pad_type = var_7734_pad_type_0, strides = var_7734_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_7719_cast_fp16)[name = string("op_7734_cast_fp16")]; - tensor var_7738 = const()[name = string("op_7738"), val = tensor([0, 2, 1])]; - int32 var_7749 = const()[name = string("op_7749"), val = int32(-1)]; - fp16 const_375_promoted_to_fp16 = const()[name = string("const_375_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_137_cast_fp16 = transpose(perm = var_7738, x = var_7734_cast_fp16)[name = string("transpose_155")]; - tensor var_7751_cast_fp16 = mul(x = hidden_states_137_cast_fp16, y = const_375_promoted_to_fp16)[name = string("op_7751_cast_fp16")]; - bool input_171_interleave_0 = const()[name = string("input_171_interleave_0"), val = bool(false)]; - tensor input_171_cast_fp16 = concat(axis = var_7749, interleave = input_171_interleave_0, values = (hidden_states_137_cast_fp16, var_7751_cast_fp16))[name = string("input_171_cast_fp16")]; - tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; - fp16 var_7746_to_fp16 = const()[name = string("op_7746_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_7746_to_fp16, x = input_171_cast_fp16)[name = string("normed_205_cast_fp16")]; - tensor normed_207_begin_0 = const()[name = string("normed_207_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_207_end_0 = const()[name = string("normed_207_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_207_end_mask_0 = const()[name = string("normed_207_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_207_cast_fp16 = slice_by_index(begin = normed_207_begin_0, end = normed_207_end_0, end_mask = normed_207_end_mask_0, x = normed_205_cast_fp16)[name = string("normed_207_cast_fp16")]; - tensor var_7765_to_fp16 = const()[name = string("op_7765_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205396608)))]; - tensor attn_output_89_cast_fp16 = mul(x = normed_207_cast_fp16, y = var_7765_to_fp16)[name = string("attn_output_89_cast_fp16")]; - tensor hidden_states_139_cast_fp16 = add(x = hidden_states_129_cast_fp16, y = attn_output_89_cast_fp16)[name = string("hidden_states_139_cast_fp16")]; - int32 var_7778 = const()[name = string("op_7778"), val = int32(-1)]; - fp16 const_379_promoted_to_fp16 = const()[name = string("const_379_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7780_cast_fp16 = mul(x = hidden_states_139_cast_fp16, y = const_379_promoted_to_fp16)[name = string("op_7780_cast_fp16")]; - bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; - tensor input_173_cast_fp16 = concat(axis = var_7778, interleave = input_173_interleave_0, values = (hidden_states_139_cast_fp16, var_7780_cast_fp16))[name = string("input_173_cast_fp16")]; - tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; - fp16 var_7775_to_fp16 = const()[name = string("op_7775_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_7775_to_fp16, x = input_173_cast_fp16)[name = string("normed_209_cast_fp16")]; - tensor normed_211_begin_0 = const()[name = string("normed_211_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_211_end_0 = const()[name = string("normed_211_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_211_end_mask_0 = const()[name = string("normed_211_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_211_cast_fp16 = slice_by_index(begin = normed_211_begin_0, end = normed_211_end_0, end_mask = normed_211_end_mask_0, x = normed_209_cast_fp16)[name = string("normed_211_cast_fp16")]; - tensor var_7794_to_fp16 = const()[name = string("op_7794_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205398976)))]; - tensor x_141_cast_fp16 = mul(x = normed_211_cast_fp16, y = var_7794_to_fp16)[name = string("x_141_cast_fp16")]; - tensor var_7806 = const()[name = string("op_7806"), val = tensor([0, 2, 1])]; - tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; - tensor var_7807_cast_fp16 = transpose(perm = var_7806, x = x_141_cast_fp16)[name = string("transpose_154")]; - tensor input_175_cast_fp16 = expand_dims(axes = input_175_axes_0, x = var_7807_cast_fp16)[name = string("input_175_cast_fp16")]; - string x_143_pad_type_0 = const()[name = string("x_143_pad_type_0"), val = string("valid")]; - tensor x_143_strides_0 = const()[name = string("x_143_strides_0"), val = tensor([1, 1])]; - tensor x_143_pad_0 = const()[name = string("x_143_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_143_dilations_0 = const()[name = string("x_143_dilations_0"), val = tensor([1, 1])]; - int32 x_143_groups_0 = const()[name = string("x_143_groups_0"), val = int32(1)]; - tensor model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(976082944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(982054976))))[name = string("model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_143_cast_fp16 = conv(dilations = x_143_dilations_0, groups = x_143_groups_0, pad = x_143_pad_0, pad_type = x_143_pad_type_0, strides = x_143_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_175_cast_fp16)[name = string("x_143_cast_fp16")]; - string b_17_pad_type_0 = const()[name = string("b_17_pad_type_0"), val = string("valid")]; - tensor b_17_strides_0 = const()[name = string("b_17_strides_0"), val = tensor([1, 1])]; - tensor b_17_pad_0 = const()[name = string("b_17_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_17_dilations_0 = const()[name = string("b_17_dilations_0"), val = tensor([1, 1])]; - int32 b_17_groups_0 = const()[name = string("b_17_groups_0"), val = int32(1)]; - tensor model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(982165632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(988137664))))[name = string("model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_17_cast_fp16 = conv(dilations = b_17_dilations_0, groups = b_17_groups_0, pad = b_17_pad_0, pad_type = b_17_pad_type_0, strides = b_17_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_175_cast_fp16)[name = string("b_17_cast_fp16")]; - string var_7832_mode_0 = const()[name = string("op_7832_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_7832_cast_fp16 = gelu(mode = var_7832_mode_0, x = x_143_cast_fp16)[name = string("op_7832_cast_fp16")]; - tensor input_177_cast_fp16 = mul(x = var_7832_cast_fp16, y = b_17_cast_fp16)[name = string("input_177_cast_fp16")]; - string e_17_pad_type_0 = const()[name = string("e_17_pad_type_0"), val = string("valid")]; - tensor e_17_strides_0 = const()[name = string("e_17_strides_0"), val = tensor([1, 1])]; - tensor e_17_pad_0 = const()[name = string("e_17_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_17_dilations_0 = const()[name = string("e_17_dilations_0"), val = tensor([1, 1])]; - int32 e_17_groups_0 = const()[name = string("e_17_groups_0"), val = int32(1)]; - tensor model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217566720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223538752))))[name = string("model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_17_cast_fp16 = conv(dilations = e_17_dilations_0, groups = e_17_groups_0, pad = e_17_pad_0, pad_type = e_17_pad_type_0, strides = e_17_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_177_cast_fp16)[name = string("e_17_cast_fp16")]; - tensor var_7840_axes_0 = const()[name = string("op_7840_axes_0"), val = tensor([2])]; - tensor var_7840_cast_fp16 = squeeze(axes = var_7840_axes_0, x = e_17_cast_fp16)[name = string("op_7840_cast_fp16")]; - tensor var_7841 = const()[name = string("op_7841"), val = tensor([0, 2, 1])]; - int32 var_7852 = const()[name = string("op_7852"), val = int32(-1)]; - fp16 const_383_promoted_to_fp16 = const()[name = string("const_383_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_141_cast_fp16 = transpose(perm = var_7841, x = var_7840_cast_fp16)[name = string("transpose_153")]; - tensor var_7854_cast_fp16 = mul(x = hidden_states_141_cast_fp16, y = const_383_promoted_to_fp16)[name = string("op_7854_cast_fp16")]; - bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; - tensor input_179_cast_fp16 = concat(axis = var_7852, interleave = input_179_interleave_0, values = (hidden_states_141_cast_fp16, var_7854_cast_fp16))[name = string("input_179_cast_fp16")]; - tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; - fp16 var_7849_to_fp16 = const()[name = string("op_7849_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_7849_to_fp16, x = input_179_cast_fp16)[name = string("normed_213_cast_fp16")]; - tensor normed_215_begin_0 = const()[name = string("normed_215_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_215_end_0 = const()[name = string("normed_215_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_215_end_mask_0 = const()[name = string("normed_215_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_215_cast_fp16 = slice_by_index(begin = normed_215_begin_0, end = normed_215_end_0, end_mask = normed_215_end_mask_0, x = normed_213_cast_fp16)[name = string("normed_215_cast_fp16")]; - tensor var_7868_to_fp16 = const()[name = string("op_7868_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223557248)))]; - tensor hidden_states_143_cast_fp16 = mul(x = normed_215_cast_fp16, y = var_7868_to_fp16)[name = string("hidden_states_143_cast_fp16")]; - tensor hidden_states_145_cast_fp16 = add(x = hidden_states_139_cast_fp16, y = hidden_states_143_cast_fp16)[name = string("hidden_states_145_cast_fp16")]; - int32 var_7922 = const()[name = string("op_7922"), val = int32(-1)]; - fp16 const_388_promoted_to_fp16 = const()[name = string("const_388_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7924_cast_fp16 = mul(x = hidden_states_145_cast_fp16, y = const_388_promoted_to_fp16)[name = string("op_7924_cast_fp16")]; - bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)]; - tensor input_181_cast_fp16 = concat(axis = var_7922, interleave = input_181_interleave_0, values = (hidden_states_145_cast_fp16, var_7924_cast_fp16))[name = string("input_181_cast_fp16")]; - tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; - fp16 var_7919_to_fp16 = const()[name = string("op_7919_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_7919_to_fp16, x = input_181_cast_fp16)[name = string("normed_217_cast_fp16")]; - tensor normed_219_begin_0 = const()[name = string("normed_219_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_219_end_0 = const()[name = string("normed_219_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_219_end_mask_0 = const()[name = string("normed_219_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_219_cast_fp16 = slice_by_index(begin = normed_219_begin_0, end = normed_219_end_0, end_mask = normed_219_end_mask_0, x = normed_217_cast_fp16)[name = string("normed_219_cast_fp16")]; - tensor var_7938_to_fp16 = const()[name = string("op_7938_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223559616)))]; - tensor hidden_states_147_cast_fp16 = mul(x = normed_219_cast_fp16, y = var_7938_to_fp16)[name = string("hidden_states_147_cast_fp16")]; - tensor var_7949 = const()[name = string("op_7949"), val = tensor([0, 2, 1])]; - tensor var_7952_axes_0 = const()[name = string("op_7952_axes_0"), val = tensor([2])]; - tensor var_7950_cast_fp16 = transpose(perm = var_7949, x = hidden_states_147_cast_fp16)[name = string("transpose_152")]; - tensor var_7952_cast_fp16 = expand_dims(axes = var_7952_axes_0, x = var_7950_cast_fp16)[name = string("op_7952_cast_fp16")]; - string query_states_73_pad_type_0 = const()[name = string("query_states_73_pad_type_0"), val = string("valid")]; - tensor query_states_73_strides_0 = const()[name = string("query_states_73_strides_0"), val = tensor([1, 1])]; - tensor query_states_73_pad_0 = const()[name = string("query_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_73_dilations_0 = const()[name = string("query_states_73_dilations_0"), val = tensor([1, 1])]; - int32 query_states_73_groups_0 = const()[name = string("query_states_73_groups_0"), val = int32(1)]; - tensor query_states_73 = conv(dilations = query_states_73_dilations_0, groups = query_states_73_groups_0, pad = query_states_73_pad_0, pad_type = query_states_73_pad_type_0, strides = query_states_73_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_7952_cast_fp16)[name = string("query_states_73")]; - string key_states_91_pad_type_0 = const()[name = string("key_states_91_pad_type_0"), val = string("valid")]; - tensor key_states_91_strides_0 = const()[name = string("key_states_91_strides_0"), val = tensor([1, 1])]; - tensor key_states_91_pad_0 = const()[name = string("key_states_91_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_91_dilations_0 = const()[name = string("key_states_91_dilations_0"), val = tensor([1, 1])]; - int32 key_states_91_groups_0 = const()[name = string("key_states_91_groups_0"), val = int32(1)]; - tensor key_states_91 = conv(dilations = key_states_91_dilations_0, groups = key_states_91_groups_0, pad = key_states_91_pad_0, pad_type = key_states_91_pad_type_0, strides = key_states_91_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_7952_cast_fp16)[name = string("key_states_91")]; - string value_states_73_pad_type_0 = const()[name = string("value_states_73_pad_type_0"), val = string("valid")]; - tensor value_states_73_strides_0 = const()[name = string("value_states_73_strides_0"), val = tensor([1, 1])]; - tensor value_states_73_pad_0 = const()[name = string("value_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_73_dilations_0 = const()[name = string("value_states_73_dilations_0"), val = tensor([1, 1])]; - int32 value_states_73_groups_0 = const()[name = string("value_states_73_groups_0"), val = int32(1)]; - tensor value_states_73 = conv(dilations = value_states_73_dilations_0, groups = value_states_73_groups_0, pad = value_states_73_pad_0, pad_type = value_states_73_pad_type_0, strides = value_states_73_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_7952_cast_fp16)[name = string("value_states_73")]; - tensor var_7994 = const()[name = string("op_7994"), val = tensor([1, 4, 256, 64])]; - tensor var_7995 = reshape(shape = var_7994, x = query_states_73)[name = string("op_7995")]; - tensor var_8000 = const()[name = string("op_8000"), val = tensor([0, 1, 3, 2])]; - tensor var_8005 = const()[name = string("op_8005"), val = tensor([1, 1, 256, 64])]; - tensor var_8006 = reshape(shape = var_8005, x = key_states_91)[name = string("op_8006")]; - tensor var_8011 = const()[name = string("op_8011"), val = tensor([0, 1, 3, 2])]; - tensor var_8016 = const()[name = string("op_8016"), val = tensor([1, 1, 256, 64])]; - tensor var_8017 = reshape(shape = var_8016, x = value_states_73)[name = string("op_8017")]; - tensor var_8022 = const()[name = string("op_8022"), val = tensor([0, 1, 3, 2])]; - int32 var_8033 = const()[name = string("op_8033"), val = int32(-1)]; - fp16 const_393_promoted = const()[name = string("const_393_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_149 = transpose(perm = var_8000, x = var_7995)[name = string("transpose_151")]; - tensor var_8035 = mul(x = hidden_states_149, y = const_393_promoted)[name = string("op_8035")]; - bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; - tensor input_185 = concat(axis = var_8033, interleave = input_185_interleave_0, values = (hidden_states_149, var_8035))[name = string("input_185")]; - tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; - fp16 var_8030_to_fp16 = const()[name = string("op_8030_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_8030_to_fp16, x = input_185)[name = string("normed_221_cast_fp16")]; - tensor normed_223_begin_0 = const()[name = string("normed_223_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_223_end_0 = const()[name = string("normed_223_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_223_end_mask_0 = const()[name = string("normed_223_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_223 = slice_by_index(begin = normed_223_begin_0, end = normed_223_end_0, end_mask = normed_223_end_mask_0, x = normed_221_cast_fp16)[name = string("normed_223")]; - tensor var_8049_to_fp16 = const()[name = string("op_8049_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223561984)))]; - tensor q_19_cast_fp16 = mul(x = normed_223, y = var_8049_to_fp16)[name = string("q_19_cast_fp16")]; - int32 var_8060 = const()[name = string("op_8060"), val = int32(-1)]; - fp16 const_397_promoted = const()[name = string("const_397_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_151 = transpose(perm = var_8011, x = var_8006)[name = string("transpose_150")]; - tensor var_8062 = mul(x = hidden_states_151, y = const_397_promoted)[name = string("op_8062")]; - bool input_187_interleave_0 = const()[name = string("input_187_interleave_0"), val = bool(false)]; - tensor input_187 = concat(axis = var_8060, interleave = input_187_interleave_0, values = (hidden_states_151, var_8062))[name = string("input_187")]; - tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; - fp16 var_8057_to_fp16 = const()[name = string("op_8057_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_8057_to_fp16, x = input_187)[name = string("normed_225_cast_fp16")]; - tensor normed_227_begin_0 = const()[name = string("normed_227_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_227_end_0 = const()[name = string("normed_227_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_227_end_mask_0 = const()[name = string("normed_227_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_227 = slice_by_index(begin = normed_227_begin_0, end = normed_227_end_0, end_mask = normed_227_end_mask_0, x = normed_225_cast_fp16)[name = string("normed_227")]; - tensor var_8076_to_fp16 = const()[name = string("op_8076_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223562560)))]; - tensor k_19_cast_fp16 = mul(x = normed_227, y = var_8076_to_fp16)[name = string("k_19_cast_fp16")]; - tensor var_8090_cast_fp16 = mul(x = q_19_cast_fp16, y = cos_5)[name = string("op_8090_cast_fp16")]; - tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19_cast_fp16)[name = string("x1_37_cast_fp16")]; - tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19_cast_fp16)[name = string("x2_37_cast_fp16")]; - fp16 const_403_promoted_to_fp16 = const()[name = string("const_403_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8111_cast_fp16 = mul(x = x2_37_cast_fp16, y = const_403_promoted_to_fp16)[name = string("op_8111_cast_fp16")]; - int32 var_8113 = const()[name = string("op_8113"), val = int32(-1)]; - bool var_8114_interleave_0 = const()[name = string("op_8114_interleave_0"), val = bool(false)]; - tensor var_8114_cast_fp16 = concat(axis = var_8113, interleave = var_8114_interleave_0, values = (var_8111_cast_fp16, x1_37_cast_fp16))[name = string("op_8114_cast_fp16")]; - tensor var_8115_cast_fp16 = mul(x = var_8114_cast_fp16, y = sin_5)[name = string("op_8115_cast_fp16")]; - tensor query_states_75_cast_fp16 = add(x = var_8090_cast_fp16, y = var_8115_cast_fp16)[name = string("query_states_75_cast_fp16")]; - tensor var_8118_cast_fp16 = mul(x = k_19_cast_fp16, y = cos_5)[name = string("op_8118_cast_fp16")]; - tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19_cast_fp16)[name = string("x1_39_cast_fp16")]; - tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19_cast_fp16)[name = string("x2_39_cast_fp16")]; - fp16 const_406_promoted_to_fp16 = const()[name = string("const_406_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8139_cast_fp16 = mul(x = x2_39_cast_fp16, y = const_406_promoted_to_fp16)[name = string("op_8139_cast_fp16")]; - int32 var_8141 = const()[name = string("op_8141"), val = int32(-1)]; - bool var_8142_interleave_0 = const()[name = string("op_8142_interleave_0"), val = bool(false)]; - tensor var_8142_cast_fp16 = concat(axis = var_8141, interleave = var_8142_interleave_0, values = (var_8139_cast_fp16, x1_39_cast_fp16))[name = string("op_8142_cast_fp16")]; - tensor var_8143_cast_fp16 = mul(x = var_8142_cast_fp16, y = sin_5)[name = string("op_8143_cast_fp16")]; - tensor key_states_93_cast_fp16 = add(x = var_8118_cast_fp16, y = var_8143_cast_fp16)[name = string("key_states_93_cast_fp16")]; - tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([8])]; - tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; - tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; - tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([9])]; - int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; - bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; - tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (expand_dims_108, expand_dims_109, current_pos, expand_dims_111))[name = string("concat_164")]; - tensor concat_165_values1_0 = const()[name = string("concat_165_values1_0"), val = tensor([0])]; - tensor concat_165_values3_0 = const()[name = string("concat_165_values3_0"), val = tensor([0])]; - int32 concat_165_axis_0 = const()[name = string("concat_165_axis_0"), val = int32(0)]; - bool concat_165_interleave_0 = const()[name = string("concat_165_interleave_0"), val = bool(false)]; - tensor concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (expand_dims_112, concat_165_values1_0, end_pos_1, concat_165_values3_0))[name = string("concat_165")]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_164, begin_mask = model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0, end = concat_165, end_mask = model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_17_stride_0, update = key_states_93_cast_fp16, x = coreml_update_state_69)[name = string("model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_122_write_state")]; - tensor coreml_update_state_70 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_122")]; - tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([30])]; - tensor expand_dims_115 = const()[name = string("expand_dims_115"), val = tensor([0])]; - tensor expand_dims_117 = const()[name = string("expand_dims_117"), val = tensor([0])]; - tensor expand_dims_118 = const()[name = string("expand_dims_118"), val = tensor([31])]; - int32 concat_168_axis_0 = const()[name = string("concat_168_axis_0"), val = int32(0)]; - bool concat_168_interleave_0 = const()[name = string("concat_168_interleave_0"), val = bool(false)]; - tensor concat_168 = concat(axis = concat_168_axis_0, interleave = concat_168_interleave_0, values = (expand_dims_114, expand_dims_115, current_pos, expand_dims_117))[name = string("concat_168")]; - tensor concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = tensor([0])]; - tensor concat_169_values3_0 = const()[name = string("concat_169_values3_0"), val = tensor([0])]; - int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; - bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; - tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (expand_dims_118, concat_169_values1_0, end_pos_1, concat_169_values3_0))[name = string("concat_169")]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_75 = transpose(perm = var_8022, x = var_8017)[name = string("transpose_149")]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_168, begin_mask = model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0, end = concat_169, end_mask = model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_18_stride_0, update = value_states_75, x = coreml_update_state_70)[name = string("model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_123_write_state")]; - tensor coreml_update_state_71 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_123")]; - tensor var_8242_begin_0 = const()[name = string("op_8242_begin_0"), val = tensor([8, 0, 0, 0])]; - tensor var_8242_end_0 = const()[name = string("op_8242_end_0"), val = tensor([9, 1, 512, 256])]; - tensor var_8242_end_mask_0 = const()[name = string("op_8242_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_8242_cast_fp16 = slice_by_index(begin = var_8242_begin_0, end = var_8242_end_0, end_mask = var_8242_end_mask_0, x = coreml_update_state_71)[name = string("op_8242_cast_fp16")]; - tensor var_8249_begin_0 = const()[name = string("op_8249_begin_0"), val = tensor([30, 0, 0, 0])]; - tensor var_8249_end_0 = const()[name = string("op_8249_end_0"), val = tensor([31, 1, 512, 256])]; - tensor var_8249_end_mask_0 = const()[name = string("op_8249_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_8249_cast_fp16 = slice_by_index(begin = var_8249_begin_0, end = var_8249_end_0, end_mask = var_8249_end_mask_0, x = coreml_update_state_71)[name = string("op_8249_cast_fp16")]; - tensor var_8288 = const()[name = string("op_8288"), val = tensor([1, 4, 1, 1])]; - tensor x_149_cast_fp16 = tile(reps = var_8288, x = var_8242_cast_fp16)[name = string("x_149_cast_fp16")]; - tensor var_8308 = const()[name = string("op_8308"), val = tensor([1, 4, 1, 1])]; - tensor x_155_cast_fp16 = tile(reps = var_8308, x = var_8249_cast_fp16)[name = string("x_155_cast_fp16")]; - bool var_8335_transpose_x_0 = const()[name = string("op_8335_transpose_x_0"), val = bool(false)]; - bool var_8335_transpose_y_0 = const()[name = string("op_8335_transpose_y_0"), val = bool(true)]; - tensor var_8335 = matmul(transpose_x = var_8335_transpose_x_0, transpose_y = var_8335_transpose_y_0, x = query_states_75_cast_fp16, y = x_149_cast_fp16)[name = string("op_8335")]; - fp16 var_8336_to_fp16 = const()[name = string("op_8336_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_37_cast_fp16 = mul(x = var_8335, y = var_8336_to_fp16)[name = string("attn_weights_37_cast_fp16")]; - tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = mask_slice_1)[name = string("attn_weights_39_cast_fp16")]; - int32 var_8371 = const()[name = string("op_8371"), val = int32(-1)]; - tensor var_8373_cast_fp16 = softmax(axis = var_8371, x = attn_weights_39_cast_fp16)[name = string("op_8373_cast_fp16")]; - tensor concat_174 = const()[name = string("concat_174"), val = tensor([4, 64, 512])]; - tensor reshape_27_cast_fp16 = reshape(shape = concat_174, x = var_8373_cast_fp16)[name = string("reshape_27_cast_fp16")]; - tensor concat_175 = const()[name = string("concat_175"), val = tensor([4, 512, 256])]; - tensor reshape_28_cast_fp16 = reshape(shape = concat_175, x = x_155_cast_fp16)[name = string("reshape_28_cast_fp16")]; - bool matmul_9_transpose_x_0 = const()[name = string("matmul_9_transpose_x_0"), val = bool(false)]; - bool matmul_9_transpose_y_0 = const()[name = string("matmul_9_transpose_y_0"), val = bool(false)]; - tensor matmul_9_cast_fp16 = matmul(transpose_x = matmul_9_transpose_x_0, transpose_y = matmul_9_transpose_y_0, x = reshape_27_cast_fp16, y = reshape_28_cast_fp16)[name = string("matmul_9_cast_fp16")]; - tensor concat_179 = const()[name = string("concat_179"), val = tensor([1, 4, 64, 256])]; - tensor reshape_29_cast_fp16 = reshape(shape = concat_179, x = matmul_9_cast_fp16)[name = string("reshape_29_cast_fp16")]; - tensor var_8385_perm_0 = const()[name = string("op_8385_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_8404 = const()[name = string("op_8404"), val = tensor([1, 64, 1024])]; - tensor var_8385_cast_fp16 = transpose(perm = var_8385_perm_0, x = reshape_29_cast_fp16)[name = string("transpose_148")]; - tensor attn_output_95_cast_fp16 = reshape(shape = var_8404, x = var_8385_cast_fp16)[name = string("attn_output_95_cast_fp16")]; - tensor var_8409 = const()[name = string("op_8409"), val = tensor([0, 2, 1])]; - string var_8425_pad_type_0 = const()[name = string("op_8425_pad_type_0"), val = string("valid")]; - int32 var_8425_groups_0 = const()[name = string("op_8425_groups_0"), val = int32(1)]; - tensor var_8425_strides_0 = const()[name = string("op_8425_strides_0"), val = tensor([1])]; - tensor var_8425_pad_0 = const()[name = string("op_8425_pad_0"), val = tensor([0, 0])]; - tensor var_8425_dilations_0 = const()[name = string("op_8425_dilations_0"), val = tensor([1])]; - tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223563136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224447936))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_8410_cast_fp16 = transpose(perm = var_8409, x = attn_output_95_cast_fp16)[name = string("transpose_147")]; - tensor var_8425_cast_fp16 = conv(dilations = var_8425_dilations_0, groups = var_8425_groups_0, pad = var_8425_pad_0, pad_type = var_8425_pad_type_0, strides = var_8425_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_8410_cast_fp16)[name = string("op_8425_cast_fp16")]; - tensor var_8429 = const()[name = string("op_8429"), val = tensor([0, 2, 1])]; - int32 var_8440 = const()[name = string("op_8440"), val = int32(-1)]; - fp16 const_418_promoted_to_fp16 = const()[name = string("const_418_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_153_cast_fp16 = transpose(perm = var_8429, x = var_8425_cast_fp16)[name = string("transpose_146")]; - tensor var_8442_cast_fp16 = mul(x = hidden_states_153_cast_fp16, y = const_418_promoted_to_fp16)[name = string("op_8442_cast_fp16")]; - bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; - tensor input_191_cast_fp16 = concat(axis = var_8440, interleave = input_191_interleave_0, values = (hidden_states_153_cast_fp16, var_8442_cast_fp16))[name = string("input_191_cast_fp16")]; - tensor normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor([-1])]; - fp16 var_8437_to_fp16 = const()[name = string("op_8437_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_8437_to_fp16, x = input_191_cast_fp16)[name = string("normed_229_cast_fp16")]; - tensor normed_231_begin_0 = const()[name = string("normed_231_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_231_end_0 = const()[name = string("normed_231_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_231_end_mask_0 = const()[name = string("normed_231_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_231_cast_fp16 = slice_by_index(begin = normed_231_begin_0, end = normed_231_end_0, end_mask = normed_231_end_mask_0, x = normed_229_cast_fp16)[name = string("normed_231_cast_fp16")]; - tensor var_8456_to_fp16 = const()[name = string("op_8456_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224466432)))]; - tensor attn_output_99_cast_fp16 = mul(x = normed_231_cast_fp16, y = var_8456_to_fp16)[name = string("attn_output_99_cast_fp16")]; - tensor hidden_states_155_cast_fp16 = add(x = hidden_states_145_cast_fp16, y = attn_output_99_cast_fp16)[name = string("hidden_states_155_cast_fp16")]; - int32 var_8469 = const()[name = string("op_8469"), val = int32(-1)]; - fp16 const_422_promoted_to_fp16 = const()[name = string("const_422_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8471_cast_fp16 = mul(x = hidden_states_155_cast_fp16, y = const_422_promoted_to_fp16)[name = string("op_8471_cast_fp16")]; - bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; - tensor input_193_cast_fp16 = concat(axis = var_8469, interleave = input_193_interleave_0, values = (hidden_states_155_cast_fp16, var_8471_cast_fp16))[name = string("input_193_cast_fp16")]; - tensor normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor([-1])]; - fp16 var_8466_to_fp16 = const()[name = string("op_8466_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_8466_to_fp16, x = input_193_cast_fp16)[name = string("normed_233_cast_fp16")]; - tensor normed_235_begin_0 = const()[name = string("normed_235_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_235_end_0 = const()[name = string("normed_235_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_235_end_mask_0 = const()[name = string("normed_235_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_235_cast_fp16 = slice_by_index(begin = normed_235_begin_0, end = normed_235_end_0, end_mask = normed_235_end_mask_0, x = normed_233_cast_fp16)[name = string("normed_235_cast_fp16")]; - tensor var_8485_to_fp16 = const()[name = string("op_8485_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224468800)))]; - tensor x_157_cast_fp16 = mul(x = normed_235_cast_fp16, y = var_8485_to_fp16)[name = string("x_157_cast_fp16")]; - tensor var_8497 = const()[name = string("op_8497"), val = tensor([0, 2, 1])]; - tensor input_195_axes_0 = const()[name = string("input_195_axes_0"), val = tensor([2])]; - tensor var_8498_cast_fp16 = transpose(perm = var_8497, x = x_157_cast_fp16)[name = string("transpose_145")]; - tensor input_195_cast_fp16 = expand_dims(axes = input_195_axes_0, x = var_8498_cast_fp16)[name = string("input_195_cast_fp16")]; - string x_159_pad_type_0 = const()[name = string("x_159_pad_type_0"), val = string("valid")]; - tensor x_159_strides_0 = const()[name = string("x_159_strides_0"), val = tensor([1, 1])]; - tensor x_159_pad_0 = const()[name = string("x_159_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_159_dilations_0 = const()[name = string("x_159_dilations_0"), val = tensor([1, 1])]; - int32 x_159_groups_0 = const()[name = string("x_159_groups_0"), val = int32(1)]; - tensor model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(988248320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(994220352))))[name = string("model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_159_cast_fp16 = conv(dilations = x_159_dilations_0, groups = x_159_groups_0, pad = x_159_pad_0, pad_type = x_159_pad_type_0, strides = x_159_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_195_cast_fp16)[name = string("x_159_cast_fp16")]; - string b_19_pad_type_0 = const()[name = string("b_19_pad_type_0"), val = string("valid")]; - tensor b_19_strides_0 = const()[name = string("b_19_strides_0"), val = tensor([1, 1])]; - tensor b_19_pad_0 = const()[name = string("b_19_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_19_dilations_0 = const()[name = string("b_19_dilations_0"), val = tensor([1, 1])]; - int32 b_19_groups_0 = const()[name = string("b_19_groups_0"), val = int32(1)]; - tensor model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(994331008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1000303040))))[name = string("model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_19_cast_fp16 = conv(dilations = b_19_dilations_0, groups = b_19_groups_0, pad = b_19_pad_0, pad_type = b_19_pad_type_0, strides = b_19_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_195_cast_fp16)[name = string("b_19_cast_fp16")]; - string var_8523_mode_0 = const()[name = string("op_8523_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_8523_cast_fp16 = gelu(mode = var_8523_mode_0, x = x_159_cast_fp16)[name = string("op_8523_cast_fp16")]; - tensor input_197_cast_fp16 = mul(x = var_8523_cast_fp16, y = b_19_cast_fp16)[name = string("input_197_cast_fp16")]; - string e_19_pad_type_0 = const()[name = string("e_19_pad_type_0"), val = string("valid")]; - tensor e_19_strides_0 = const()[name = string("e_19_strides_0"), val = tensor([1, 1])]; - tensor e_19_pad_0 = const()[name = string("e_19_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_19_dilations_0 = const()[name = string("e_19_dilations_0"), val = tensor([1, 1])]; - int32 e_19_groups_0 = const()[name = string("e_19_groups_0"), val = int32(1)]; - tensor model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236636544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242608576))))[name = string("model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_19_cast_fp16 = conv(dilations = e_19_dilations_0, groups = e_19_groups_0, pad = e_19_pad_0, pad_type = e_19_pad_type_0, strides = e_19_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_197_cast_fp16)[name = string("e_19_cast_fp16")]; - tensor var_8531_axes_0 = const()[name = string("op_8531_axes_0"), val = tensor([2])]; - tensor var_8531_cast_fp16 = squeeze(axes = var_8531_axes_0, x = e_19_cast_fp16)[name = string("op_8531_cast_fp16")]; - tensor var_8532 = const()[name = string("op_8532"), val = tensor([0, 2, 1])]; - int32 var_8543 = const()[name = string("op_8543"), val = int32(-1)]; - fp16 const_426_promoted_to_fp16 = const()[name = string("const_426_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_157_cast_fp16 = transpose(perm = var_8532, x = var_8531_cast_fp16)[name = string("transpose_144")]; - tensor var_8545_cast_fp16 = mul(x = hidden_states_157_cast_fp16, y = const_426_promoted_to_fp16)[name = string("op_8545_cast_fp16")]; - bool input_199_interleave_0 = const()[name = string("input_199_interleave_0"), val = bool(false)]; - tensor input_199_cast_fp16 = concat(axis = var_8543, interleave = input_199_interleave_0, values = (hidden_states_157_cast_fp16, var_8545_cast_fp16))[name = string("input_199_cast_fp16")]; - tensor normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor([-1])]; - fp16 var_8540_to_fp16 = const()[name = string("op_8540_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_8540_to_fp16, x = input_199_cast_fp16)[name = string("normed_237_cast_fp16")]; - tensor normed_239_begin_0 = const()[name = string("normed_239_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_239_end_0 = const()[name = string("normed_239_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_239_end_mask_0 = const()[name = string("normed_239_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_239_cast_fp16 = slice_by_index(begin = normed_239_begin_0, end = normed_239_end_0, end_mask = normed_239_end_mask_0, x = normed_237_cast_fp16)[name = string("normed_239_cast_fp16")]; - tensor var_8559_to_fp16 = const()[name = string("op_8559_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242627072)))]; - tensor hidden_states_159_cast_fp16 = mul(x = normed_239_cast_fp16, y = var_8559_to_fp16)[name = string("hidden_states_159_cast_fp16")]; - tensor hidden_states_161_cast_fp16 = add(x = hidden_states_155_cast_fp16, y = hidden_states_159_cast_fp16)[name = string("hidden_states_161_cast_fp16")]; - int32 var_8613 = const()[name = string("op_8613"), val = int32(-1)]; - fp16 const_431_promoted_to_fp16 = const()[name = string("const_431_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8615_cast_fp16 = mul(x = hidden_states_161_cast_fp16, y = const_431_promoted_to_fp16)[name = string("op_8615_cast_fp16")]; - bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)]; - tensor input_201_cast_fp16 = concat(axis = var_8613, interleave = input_201_interleave_0, values = (hidden_states_161_cast_fp16, var_8615_cast_fp16))[name = string("input_201_cast_fp16")]; - tensor normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor([-1])]; - fp16 var_8610_to_fp16 = const()[name = string("op_8610_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_8610_to_fp16, x = input_201_cast_fp16)[name = string("normed_241_cast_fp16")]; - tensor normed_243_begin_0 = const()[name = string("normed_243_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_243_end_0 = const()[name = string("normed_243_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_243_end_mask_0 = const()[name = string("normed_243_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_243_cast_fp16 = slice_by_index(begin = normed_243_begin_0, end = normed_243_end_0, end_mask = normed_243_end_mask_0, x = normed_241_cast_fp16)[name = string("normed_243_cast_fp16")]; - tensor var_8629_to_fp16 = const()[name = string("op_8629_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242629440)))]; - tensor hidden_states_163_cast_fp16 = mul(x = normed_243_cast_fp16, y = var_8629_to_fp16)[name = string("hidden_states_163_cast_fp16")]; - tensor var_8640 = const()[name = string("op_8640"), val = tensor([0, 2, 1])]; - tensor var_8643_axes_0 = const()[name = string("op_8643_axes_0"), val = tensor([2])]; - tensor var_8641_cast_fp16 = transpose(perm = var_8640, x = hidden_states_163_cast_fp16)[name = string("transpose_143")]; - tensor var_8643_cast_fp16 = expand_dims(axes = var_8643_axes_0, x = var_8641_cast_fp16)[name = string("op_8643_cast_fp16")]; - string query_states_81_pad_type_0 = const()[name = string("query_states_81_pad_type_0"), val = string("valid")]; - tensor query_states_81_strides_0 = const()[name = string("query_states_81_strides_0"), val = tensor([1, 1])]; - tensor query_states_81_pad_0 = const()[name = string("query_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_81_dilations_0 = const()[name = string("query_states_81_dilations_0"), val = tensor([1, 1])]; - int32 query_states_81_groups_0 = const()[name = string("query_states_81_groups_0"), val = int32(1)]; - tensor query_states_81 = conv(dilations = query_states_81_dilations_0, groups = query_states_81_groups_0, pad = query_states_81_pad_0, pad_type = query_states_81_pad_type_0, strides = query_states_81_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_8643_cast_fp16)[name = string("query_states_81")]; - string key_states_101_pad_type_0 = const()[name = string("key_states_101_pad_type_0"), val = string("valid")]; - tensor key_states_101_strides_0 = const()[name = string("key_states_101_strides_0"), val = tensor([1, 1])]; - tensor key_states_101_pad_0 = const()[name = string("key_states_101_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_101_dilations_0 = const()[name = string("key_states_101_dilations_0"), val = tensor([1, 1])]; - int32 key_states_101_groups_0 = const()[name = string("key_states_101_groups_0"), val = int32(1)]; - tensor key_states_101 = conv(dilations = key_states_101_dilations_0, groups = key_states_101_groups_0, pad = key_states_101_pad_0, pad_type = key_states_101_pad_type_0, strides = key_states_101_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_8643_cast_fp16)[name = string("key_states_101")]; - string value_states_81_pad_type_0 = const()[name = string("value_states_81_pad_type_0"), val = string("valid")]; - tensor value_states_81_strides_0 = const()[name = string("value_states_81_strides_0"), val = tensor([1, 1])]; - tensor value_states_81_pad_0 = const()[name = string("value_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_81_dilations_0 = const()[name = string("value_states_81_dilations_0"), val = tensor([1, 1])]; - int32 value_states_81_groups_0 = const()[name = string("value_states_81_groups_0"), val = int32(1)]; - tensor value_states_81 = conv(dilations = value_states_81_dilations_0, groups = value_states_81_groups_0, pad = value_states_81_pad_0, pad_type = value_states_81_pad_type_0, strides = value_states_81_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_8643_cast_fp16)[name = string("value_states_81")]; - tensor var_8685 = const()[name = string("op_8685"), val = tensor([1, 4, 256, 64])]; - tensor var_8686 = reshape(shape = var_8685, x = query_states_81)[name = string("op_8686")]; - tensor var_8691 = const()[name = string("op_8691"), val = tensor([0, 1, 3, 2])]; - tensor var_8696 = const()[name = string("op_8696"), val = tensor([1, 1, 256, 64])]; - tensor var_8697 = reshape(shape = var_8696, x = key_states_101)[name = string("op_8697")]; - tensor var_8702 = const()[name = string("op_8702"), val = tensor([0, 1, 3, 2])]; - tensor var_8707 = const()[name = string("op_8707"), val = tensor([1, 1, 256, 64])]; - tensor var_8708 = reshape(shape = var_8707, x = value_states_81)[name = string("op_8708")]; - tensor var_8713 = const()[name = string("op_8713"), val = tensor([0, 1, 3, 2])]; - int32 var_8724 = const()[name = string("op_8724"), val = int32(-1)]; - fp16 const_436_promoted = const()[name = string("const_436_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_165 = transpose(perm = var_8691, x = var_8686)[name = string("transpose_142")]; - tensor var_8726 = mul(x = hidden_states_165, y = const_436_promoted)[name = string("op_8726")]; - bool input_205_interleave_0 = const()[name = string("input_205_interleave_0"), val = bool(false)]; - tensor input_205 = concat(axis = var_8724, interleave = input_205_interleave_0, values = (hidden_states_165, var_8726))[name = string("input_205")]; - tensor normed_245_axes_0 = const()[name = string("normed_245_axes_0"), val = tensor([-1])]; - fp16 var_8721_to_fp16 = const()[name = string("op_8721_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_245_cast_fp16 = layer_norm(axes = normed_245_axes_0, epsilon = var_8721_to_fp16, x = input_205)[name = string("normed_245_cast_fp16")]; - tensor normed_247_begin_0 = const()[name = string("normed_247_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_247_end_0 = const()[name = string("normed_247_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_247_end_mask_0 = const()[name = string("normed_247_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_247 = slice_by_index(begin = normed_247_begin_0, end = normed_247_end_0, end_mask = normed_247_end_mask_0, x = normed_245_cast_fp16)[name = string("normed_247")]; - tensor var_8740_to_fp16 = const()[name = string("op_8740_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242631808)))]; - tensor q_21_cast_fp16 = mul(x = normed_247, y = var_8740_to_fp16)[name = string("q_21_cast_fp16")]; - int32 var_8751 = const()[name = string("op_8751"), val = int32(-1)]; - fp16 const_440_promoted = const()[name = string("const_440_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_167 = transpose(perm = var_8702, x = var_8697)[name = string("transpose_141")]; - tensor var_8753 = mul(x = hidden_states_167, y = const_440_promoted)[name = string("op_8753")]; - bool input_207_interleave_0 = const()[name = string("input_207_interleave_0"), val = bool(false)]; - tensor input_207 = concat(axis = var_8751, interleave = input_207_interleave_0, values = (hidden_states_167, var_8753))[name = string("input_207")]; - tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; - fp16 var_8748_to_fp16 = const()[name = string("op_8748_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_8748_to_fp16, x = input_207)[name = string("normed_249_cast_fp16")]; - tensor normed_251_begin_0 = const()[name = string("normed_251_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_251_end_0 = const()[name = string("normed_251_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_251_end_mask_0 = const()[name = string("normed_251_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_251 = slice_by_index(begin = normed_251_begin_0, end = normed_251_end_0, end_mask = normed_251_end_mask_0, x = normed_249_cast_fp16)[name = string("normed_251")]; - tensor var_8767_to_fp16 = const()[name = string("op_8767_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242632384)))]; - tensor k_21_cast_fp16 = mul(x = normed_251, y = var_8767_to_fp16)[name = string("k_21_cast_fp16")]; - tensor var_8781_cast_fp16 = mul(x = q_21_cast_fp16, y = cos_5)[name = string("op_8781_cast_fp16")]; - tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21_cast_fp16)[name = string("x1_41_cast_fp16")]; - tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21_cast_fp16)[name = string("x2_41_cast_fp16")]; - fp16 const_446_promoted_to_fp16 = const()[name = string("const_446_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8802_cast_fp16 = mul(x = x2_41_cast_fp16, y = const_446_promoted_to_fp16)[name = string("op_8802_cast_fp16")]; - int32 var_8804 = const()[name = string("op_8804"), val = int32(-1)]; - bool var_8805_interleave_0 = const()[name = string("op_8805_interleave_0"), val = bool(false)]; - tensor var_8805_cast_fp16 = concat(axis = var_8804, interleave = var_8805_interleave_0, values = (var_8802_cast_fp16, x1_41_cast_fp16))[name = string("op_8805_cast_fp16")]; - tensor var_8806_cast_fp16 = mul(x = var_8805_cast_fp16, y = sin_5)[name = string("op_8806_cast_fp16")]; - tensor query_states_83_cast_fp16 = add(x = var_8781_cast_fp16, y = var_8806_cast_fp16)[name = string("query_states_83_cast_fp16")]; - tensor var_8809_cast_fp16 = mul(x = k_21_cast_fp16, y = cos_5)[name = string("op_8809_cast_fp16")]; - tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21_cast_fp16)[name = string("x1_43_cast_fp16")]; - tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21_cast_fp16)[name = string("x2_43_cast_fp16")]; - fp16 const_449_promoted_to_fp16 = const()[name = string("const_449_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8830_cast_fp16 = mul(x = x2_43_cast_fp16, y = const_449_promoted_to_fp16)[name = string("op_8830_cast_fp16")]; - int32 var_8832 = const()[name = string("op_8832"), val = int32(-1)]; - bool var_8833_interleave_0 = const()[name = string("op_8833_interleave_0"), val = bool(false)]; - tensor var_8833_cast_fp16 = concat(axis = var_8832, interleave = var_8833_interleave_0, values = (var_8830_cast_fp16, x1_43_cast_fp16))[name = string("op_8833_cast_fp16")]; - tensor var_8834_cast_fp16 = mul(x = var_8833_cast_fp16, y = sin_5)[name = string("op_8834_cast_fp16")]; - tensor key_states_103_cast_fp16 = add(x = var_8809_cast_fp16, y = var_8834_cast_fp16)[name = string("key_states_103_cast_fp16")]; - tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([9])]; - tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; - tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; - tensor expand_dims_124 = const()[name = string("expand_dims_124"), val = tensor([10])]; - int32 concat_182_axis_0 = const()[name = string("concat_182_axis_0"), val = int32(0)]; - bool concat_182_interleave_0 = const()[name = string("concat_182_interleave_0"), val = bool(false)]; - tensor concat_182 = concat(axis = concat_182_axis_0, interleave = concat_182_interleave_0, values = (expand_dims_120, expand_dims_121, current_pos, expand_dims_123))[name = string("concat_182")]; - tensor concat_183_values1_0 = const()[name = string("concat_183_values1_0"), val = tensor([0])]; - tensor concat_183_values3_0 = const()[name = string("concat_183_values3_0"), val = tensor([0])]; - int32 concat_183_axis_0 = const()[name = string("concat_183_axis_0"), val = int32(0)]; - bool concat_183_interleave_0 = const()[name = string("concat_183_interleave_0"), val = bool(false)]; - tensor concat_183 = concat(axis = concat_183_axis_0, interleave = concat_183_interleave_0, values = (expand_dims_124, concat_183_values1_0, end_pos_1, concat_183_values3_0))[name = string("concat_183")]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_182, begin_mask = model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0, end = concat_183, end_mask = model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_19_stride_0, update = key_states_103_cast_fp16, x = coreml_update_state_71)[name = string("model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_124_write_state")]; - tensor coreml_update_state_72 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_124")]; - tensor expand_dims_126 = const()[name = string("expand_dims_126"), val = tensor([31])]; - tensor expand_dims_127 = const()[name = string("expand_dims_127"), val = tensor([0])]; - tensor expand_dims_129 = const()[name = string("expand_dims_129"), val = tensor([0])]; - tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([32])]; - int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; - bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; - tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (expand_dims_126, expand_dims_127, current_pos, expand_dims_129))[name = string("concat_186")]; - tensor concat_187_values1_0 = const()[name = string("concat_187_values1_0"), val = tensor([0])]; - tensor concat_187_values3_0 = const()[name = string("concat_187_values3_0"), val = tensor([0])]; - int32 concat_187_axis_0 = const()[name = string("concat_187_axis_0"), val = int32(0)]; - bool concat_187_interleave_0 = const()[name = string("concat_187_interleave_0"), val = bool(false)]; - tensor concat_187 = concat(axis = concat_187_axis_0, interleave = concat_187_interleave_0, values = (expand_dims_130, concat_187_values1_0, end_pos_1, concat_187_values3_0))[name = string("concat_187")]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_83 = transpose(perm = var_8713, x = var_8708)[name = string("transpose_140")]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_186, begin_mask = model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0, end = concat_187, end_mask = model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_20_stride_0, update = value_states_83, x = coreml_update_state_72)[name = string("model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_125_write_state")]; - tensor coreml_update_state_73 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_125")]; - tensor var_8933_begin_0 = const()[name = string("op_8933_begin_0"), val = tensor([9, 0, 0, 0])]; - tensor var_8933_end_0 = const()[name = string("op_8933_end_0"), val = tensor([10, 1, 512, 256])]; - tensor var_8933_end_mask_0 = const()[name = string("op_8933_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_8933_cast_fp16 = slice_by_index(begin = var_8933_begin_0, end = var_8933_end_0, end_mask = var_8933_end_mask_0, x = coreml_update_state_73)[name = string("op_8933_cast_fp16")]; - tensor var_8940_begin_0 = const()[name = string("op_8940_begin_0"), val = tensor([31, 0, 0, 0])]; - tensor var_8940_end_0 = const()[name = string("op_8940_end_0"), val = tensor([32, 1, 512, 256])]; - tensor var_8940_end_mask_0 = const()[name = string("op_8940_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_8940_cast_fp16 = slice_by_index(begin = var_8940_begin_0, end = var_8940_end_0, end_mask = var_8940_end_mask_0, x = coreml_update_state_73)[name = string("op_8940_cast_fp16")]; - tensor var_8979 = const()[name = string("op_8979"), val = tensor([1, 4, 1, 1])]; - tensor x_165_cast_fp16 = tile(reps = var_8979, x = var_8933_cast_fp16)[name = string("x_165_cast_fp16")]; - tensor var_8999 = const()[name = string("op_8999"), val = tensor([1, 4, 1, 1])]; - tensor x_171_cast_fp16 = tile(reps = var_8999, x = var_8940_cast_fp16)[name = string("x_171_cast_fp16")]; - bool var_9026_transpose_x_0 = const()[name = string("op_9026_transpose_x_0"), val = bool(false)]; - bool var_9026_transpose_y_0 = const()[name = string("op_9026_transpose_y_0"), val = bool(true)]; - tensor var_9026 = matmul(transpose_x = var_9026_transpose_x_0, transpose_y = var_9026_transpose_y_0, x = query_states_83_cast_fp16, y = x_165_cast_fp16)[name = string("op_9026")]; - fp16 var_9027_to_fp16 = const()[name = string("op_9027_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_41_cast_fp16 = mul(x = var_9026, y = var_9027_to_fp16)[name = string("attn_weights_41_cast_fp16")]; - tensor attn_weights_43_cast_fp16 = add(x = attn_weights_41_cast_fp16, y = mask_slice_1)[name = string("attn_weights_43_cast_fp16")]; - int32 var_9062 = const()[name = string("op_9062"), val = int32(-1)]; - tensor var_9064_cast_fp16 = softmax(axis = var_9062, x = attn_weights_43_cast_fp16)[name = string("op_9064_cast_fp16")]; - tensor concat_192 = const()[name = string("concat_192"), val = tensor([4, 64, 512])]; - tensor reshape_30_cast_fp16 = reshape(shape = concat_192, x = var_9064_cast_fp16)[name = string("reshape_30_cast_fp16")]; - tensor concat_193 = const()[name = string("concat_193"), val = tensor([4, 512, 256])]; - tensor reshape_31_cast_fp16 = reshape(shape = concat_193, x = x_171_cast_fp16)[name = string("reshape_31_cast_fp16")]; - bool matmul_10_transpose_x_0 = const()[name = string("matmul_10_transpose_x_0"), val = bool(false)]; - bool matmul_10_transpose_y_0 = const()[name = string("matmul_10_transpose_y_0"), val = bool(false)]; - tensor matmul_10_cast_fp16 = matmul(transpose_x = matmul_10_transpose_x_0, transpose_y = matmul_10_transpose_y_0, x = reshape_30_cast_fp16, y = reshape_31_cast_fp16)[name = string("matmul_10_cast_fp16")]; - tensor concat_197 = const()[name = string("concat_197"), val = tensor([1, 4, 64, 256])]; - tensor reshape_32_cast_fp16 = reshape(shape = concat_197, x = matmul_10_cast_fp16)[name = string("reshape_32_cast_fp16")]; - tensor var_9076_perm_0 = const()[name = string("op_9076_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_9095 = const()[name = string("op_9095"), val = tensor([1, 64, 1024])]; - tensor var_9076_cast_fp16 = transpose(perm = var_9076_perm_0, x = reshape_32_cast_fp16)[name = string("transpose_139")]; - tensor attn_output_105_cast_fp16 = reshape(shape = var_9095, x = var_9076_cast_fp16)[name = string("attn_output_105_cast_fp16")]; - tensor var_9100 = const()[name = string("op_9100"), val = tensor([0, 2, 1])]; - string var_9116_pad_type_0 = const()[name = string("op_9116_pad_type_0"), val = string("valid")]; - int32 var_9116_groups_0 = const()[name = string("op_9116_groups_0"), val = int32(1)]; - tensor var_9116_strides_0 = const()[name = string("op_9116_strides_0"), val = tensor([1])]; - tensor var_9116_pad_0 = const()[name = string("op_9116_pad_0"), val = tensor([0, 0])]; - tensor var_9116_dilations_0 = const()[name = string("op_9116_dilations_0"), val = tensor([1])]; - tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242632960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243517760))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_9101_cast_fp16 = transpose(perm = var_9100, x = attn_output_105_cast_fp16)[name = string("transpose_138")]; - tensor var_9116_cast_fp16 = conv(dilations = var_9116_dilations_0, groups = var_9116_groups_0, pad = var_9116_pad_0, pad_type = var_9116_pad_type_0, strides = var_9116_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_9101_cast_fp16)[name = string("op_9116_cast_fp16")]; - tensor var_9120 = const()[name = string("op_9120"), val = tensor([0, 2, 1])]; - int32 var_9131 = const()[name = string("op_9131"), val = int32(-1)]; - fp16 const_461_promoted_to_fp16 = const()[name = string("const_461_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_169_cast_fp16 = transpose(perm = var_9120, x = var_9116_cast_fp16)[name = string("transpose_137")]; - tensor var_9133_cast_fp16 = mul(x = hidden_states_169_cast_fp16, y = const_461_promoted_to_fp16)[name = string("op_9133_cast_fp16")]; - bool input_211_interleave_0 = const()[name = string("input_211_interleave_0"), val = bool(false)]; - tensor input_211_cast_fp16 = concat(axis = var_9131, interleave = input_211_interleave_0, values = (hidden_states_169_cast_fp16, var_9133_cast_fp16))[name = string("input_211_cast_fp16")]; - tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; - fp16 var_9128_to_fp16 = const()[name = string("op_9128_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_9128_to_fp16, x = input_211_cast_fp16)[name = string("normed_253_cast_fp16")]; - tensor normed_255_begin_0 = const()[name = string("normed_255_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_255_end_0 = const()[name = string("normed_255_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_255_end_mask_0 = const()[name = string("normed_255_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_255_cast_fp16 = slice_by_index(begin = normed_255_begin_0, end = normed_255_end_0, end_mask = normed_255_end_mask_0, x = normed_253_cast_fp16)[name = string("normed_255_cast_fp16")]; - tensor var_9147_to_fp16 = const()[name = string("op_9147_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243536256)))]; - tensor attn_output_109_cast_fp16 = mul(x = normed_255_cast_fp16, y = var_9147_to_fp16)[name = string("attn_output_109_cast_fp16")]; - tensor hidden_states_171_cast_fp16 = add(x = hidden_states_161_cast_fp16, y = attn_output_109_cast_fp16)[name = string("hidden_states_171_cast_fp16")]; - int32 var_9160 = const()[name = string("op_9160"), val = int32(-1)]; - fp16 const_465_promoted_to_fp16 = const()[name = string("const_465_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9162_cast_fp16 = mul(x = hidden_states_171_cast_fp16, y = const_465_promoted_to_fp16)[name = string("op_9162_cast_fp16")]; - bool input_213_interleave_0 = const()[name = string("input_213_interleave_0"), val = bool(false)]; - tensor input_213_cast_fp16 = concat(axis = var_9160, interleave = input_213_interleave_0, values = (hidden_states_171_cast_fp16, var_9162_cast_fp16))[name = string("input_213_cast_fp16")]; - tensor normed_257_axes_0 = const()[name = string("normed_257_axes_0"), val = tensor([-1])]; - fp16 var_9157_to_fp16 = const()[name = string("op_9157_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_257_cast_fp16 = layer_norm(axes = normed_257_axes_0, epsilon = var_9157_to_fp16, x = input_213_cast_fp16)[name = string("normed_257_cast_fp16")]; - tensor normed_259_begin_0 = const()[name = string("normed_259_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_259_end_0 = const()[name = string("normed_259_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_259_end_mask_0 = const()[name = string("normed_259_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_259_cast_fp16 = slice_by_index(begin = normed_259_begin_0, end = normed_259_end_0, end_mask = normed_259_end_mask_0, x = normed_257_cast_fp16)[name = string("normed_259_cast_fp16")]; - tensor var_9176_to_fp16 = const()[name = string("op_9176_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243538624)))]; - tensor x_173_cast_fp16 = mul(x = normed_259_cast_fp16, y = var_9176_to_fp16)[name = string("x_173_cast_fp16")]; - tensor var_9188 = const()[name = string("op_9188"), val = tensor([0, 2, 1])]; - tensor input_215_axes_0 = const()[name = string("input_215_axes_0"), val = tensor([2])]; - tensor var_9189_cast_fp16 = transpose(perm = var_9188, x = x_173_cast_fp16)[name = string("transpose_136")]; - tensor input_215_cast_fp16 = expand_dims(axes = input_215_axes_0, x = var_9189_cast_fp16)[name = string("input_215_cast_fp16")]; - string x_175_pad_type_0 = const()[name = string("x_175_pad_type_0"), val = string("valid")]; - tensor x_175_strides_0 = const()[name = string("x_175_strides_0"), val = tensor([1, 1])]; - tensor x_175_pad_0 = const()[name = string("x_175_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_175_dilations_0 = const()[name = string("x_175_dilations_0"), val = tensor([1, 1])]; - int32 x_175_groups_0 = const()[name = string("x_175_groups_0"), val = int32(1)]; - tensor model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1000413696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1006385728))))[name = string("model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_175_cast_fp16 = conv(dilations = x_175_dilations_0, groups = x_175_groups_0, pad = x_175_pad_0, pad_type = x_175_pad_type_0, strides = x_175_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_215_cast_fp16)[name = string("x_175_cast_fp16")]; - string b_21_pad_type_0 = const()[name = string("b_21_pad_type_0"), val = string("valid")]; - tensor b_21_strides_0 = const()[name = string("b_21_strides_0"), val = tensor([1, 1])]; - tensor b_21_pad_0 = const()[name = string("b_21_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_21_dilations_0 = const()[name = string("b_21_dilations_0"), val = tensor([1, 1])]; - int32 b_21_groups_0 = const()[name = string("b_21_groups_0"), val = int32(1)]; - tensor model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1006496384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1012468416))))[name = string("model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_21_cast_fp16 = conv(dilations = b_21_dilations_0, groups = b_21_groups_0, pad = b_21_pad_0, pad_type = b_21_pad_type_0, strides = b_21_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_215_cast_fp16)[name = string("b_21_cast_fp16")]; - string var_9214_mode_0 = const()[name = string("op_9214_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_9214_cast_fp16 = gelu(mode = var_9214_mode_0, x = x_175_cast_fp16)[name = string("op_9214_cast_fp16")]; - tensor input_217_cast_fp16 = mul(x = var_9214_cast_fp16, y = b_21_cast_fp16)[name = string("input_217_cast_fp16")]; - string e_21_pad_type_0 = const()[name = string("e_21_pad_type_0"), val = string("valid")]; - tensor e_21_strides_0 = const()[name = string("e_21_strides_0"), val = tensor([1, 1])]; - tensor e_21_pad_0 = const()[name = string("e_21_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_21_dilations_0 = const()[name = string("e_21_dilations_0"), val = tensor([1, 1])]; - int32 e_21_groups_0 = const()[name = string("e_21_groups_0"), val = int32(1)]; - tensor model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255706368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261678400))))[name = string("model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_21_cast_fp16 = conv(dilations = e_21_dilations_0, groups = e_21_groups_0, pad = e_21_pad_0, pad_type = e_21_pad_type_0, strides = e_21_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_217_cast_fp16)[name = string("e_21_cast_fp16")]; - tensor var_9222_axes_0 = const()[name = string("op_9222_axes_0"), val = tensor([2])]; - tensor var_9222_cast_fp16 = squeeze(axes = var_9222_axes_0, x = e_21_cast_fp16)[name = string("op_9222_cast_fp16")]; - tensor var_9223 = const()[name = string("op_9223"), val = tensor([0, 2, 1])]; - int32 var_9234 = const()[name = string("op_9234"), val = int32(-1)]; - fp16 const_469_promoted_to_fp16 = const()[name = string("const_469_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_173_cast_fp16 = transpose(perm = var_9223, x = var_9222_cast_fp16)[name = string("transpose_135")]; - tensor var_9236_cast_fp16 = mul(x = hidden_states_173_cast_fp16, y = const_469_promoted_to_fp16)[name = string("op_9236_cast_fp16")]; - bool input_219_interleave_0 = const()[name = string("input_219_interleave_0"), val = bool(false)]; - tensor input_219_cast_fp16 = concat(axis = var_9234, interleave = input_219_interleave_0, values = (hidden_states_173_cast_fp16, var_9236_cast_fp16))[name = string("input_219_cast_fp16")]; - tensor normed_261_axes_0 = const()[name = string("normed_261_axes_0"), val = tensor([-1])]; - fp16 var_9231_to_fp16 = const()[name = string("op_9231_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_261_cast_fp16 = layer_norm(axes = normed_261_axes_0, epsilon = var_9231_to_fp16, x = input_219_cast_fp16)[name = string("normed_261_cast_fp16")]; - tensor normed_263_begin_0 = const()[name = string("normed_263_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_263_end_0 = const()[name = string("normed_263_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_263_end_mask_0 = const()[name = string("normed_263_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_263_cast_fp16 = slice_by_index(begin = normed_263_begin_0, end = normed_263_end_0, end_mask = normed_263_end_mask_0, x = normed_261_cast_fp16)[name = string("normed_263_cast_fp16")]; - tensor var_9250_to_fp16 = const()[name = string("op_9250_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261696896)))]; - tensor hidden_states_175_cast_fp16 = mul(x = normed_263_cast_fp16, y = var_9250_to_fp16)[name = string("hidden_states_175_cast_fp16")]; - tensor hidden_states_177_cast_fp16 = add(x = hidden_states_171_cast_fp16, y = hidden_states_175_cast_fp16)[name = string("hidden_states_177_cast_fp16")]; - int32 var_9304 = const()[name = string("op_9304"), val = int32(-1)]; - fp16 const_474_promoted_to_fp16 = const()[name = string("const_474_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9306_cast_fp16 = mul(x = hidden_states_177_cast_fp16, y = const_474_promoted_to_fp16)[name = string("op_9306_cast_fp16")]; - bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)]; - tensor input_221_cast_fp16 = concat(axis = var_9304, interleave = input_221_interleave_0, values = (hidden_states_177_cast_fp16, var_9306_cast_fp16))[name = string("input_221_cast_fp16")]; - tensor normed_265_axes_0 = const()[name = string("normed_265_axes_0"), val = tensor([-1])]; - fp16 var_9301_to_fp16 = const()[name = string("op_9301_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_265_cast_fp16 = layer_norm(axes = normed_265_axes_0, epsilon = var_9301_to_fp16, x = input_221_cast_fp16)[name = string("normed_265_cast_fp16")]; - tensor normed_267_begin_0 = const()[name = string("normed_267_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_267_end_0 = const()[name = string("normed_267_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_267_end_mask_0 = const()[name = string("normed_267_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_267_cast_fp16 = slice_by_index(begin = normed_267_begin_0, end = normed_267_end_0, end_mask = normed_267_end_mask_0, x = normed_265_cast_fp16)[name = string("normed_267_cast_fp16")]; - tensor var_9320_to_fp16 = const()[name = string("op_9320_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261699264)))]; - tensor hidden_states_179_cast_fp16 = mul(x = normed_267_cast_fp16, y = var_9320_to_fp16)[name = string("hidden_states_179_cast_fp16")]; - tensor var_9331 = const()[name = string("op_9331"), val = tensor([0, 2, 1])]; - tensor var_9334_axes_0 = const()[name = string("op_9334_axes_0"), val = tensor([2])]; - tensor var_9332_cast_fp16 = transpose(perm = var_9331, x = hidden_states_179_cast_fp16)[name = string("transpose_134")]; - tensor var_9334_cast_fp16 = expand_dims(axes = var_9334_axes_0, x = var_9332_cast_fp16)[name = string("op_9334_cast_fp16")]; - string query_states_89_pad_type_0 = const()[name = string("query_states_89_pad_type_0"), val = string("valid")]; - tensor query_states_89_strides_0 = const()[name = string("query_states_89_strides_0"), val = tensor([1, 1])]; - tensor query_states_89_pad_0 = const()[name = string("query_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_89_dilations_0 = const()[name = string("query_states_89_dilations_0"), val = tensor([1, 1])]; - int32 query_states_89_groups_0 = const()[name = string("query_states_89_groups_0"), val = int32(1)]; - tensor query_states_89 = conv(dilations = query_states_89_dilations_0, groups = query_states_89_groups_0, pad = query_states_89_pad_0, pad_type = query_states_89_pad_type_0, strides = query_states_89_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_9334_cast_fp16)[name = string("query_states_89")]; - string key_states_111_pad_type_0 = const()[name = string("key_states_111_pad_type_0"), val = string("valid")]; - tensor key_states_111_strides_0 = const()[name = string("key_states_111_strides_0"), val = tensor([1, 1])]; - tensor key_states_111_pad_0 = const()[name = string("key_states_111_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_111_dilations_0 = const()[name = string("key_states_111_dilations_0"), val = tensor([1, 1])]; - int32 key_states_111_groups_0 = const()[name = string("key_states_111_groups_0"), val = int32(1)]; - tensor key_states_111 = conv(dilations = key_states_111_dilations_0, groups = key_states_111_groups_0, pad = key_states_111_pad_0, pad_type = key_states_111_pad_type_0, strides = key_states_111_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_9334_cast_fp16)[name = string("key_states_111")]; - string value_states_89_pad_type_0 = const()[name = string("value_states_89_pad_type_0"), val = string("valid")]; - tensor value_states_89_strides_0 = const()[name = string("value_states_89_strides_0"), val = tensor([1, 1])]; - tensor value_states_89_pad_0 = const()[name = string("value_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_89_dilations_0 = const()[name = string("value_states_89_dilations_0"), val = tensor([1, 1])]; - int32 value_states_89_groups_0 = const()[name = string("value_states_89_groups_0"), val = int32(1)]; - tensor value_states_89 = conv(dilations = value_states_89_dilations_0, groups = value_states_89_groups_0, pad = value_states_89_pad_0, pad_type = value_states_89_pad_type_0, strides = value_states_89_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_9334_cast_fp16)[name = string("value_states_89")]; - tensor var_9376 = const()[name = string("op_9376"), val = tensor([1, 4, 256, 64])]; - tensor var_9377 = reshape(shape = var_9376, x = query_states_89)[name = string("op_9377")]; - tensor var_9382 = const()[name = string("op_9382"), val = tensor([0, 1, 3, 2])]; - tensor var_9387 = const()[name = string("op_9387"), val = tensor([1, 1, 256, 64])]; - tensor var_9388 = reshape(shape = var_9387, x = key_states_111)[name = string("op_9388")]; - tensor var_9393 = const()[name = string("op_9393"), val = tensor([0, 1, 3, 2])]; - tensor var_9398 = const()[name = string("op_9398"), val = tensor([1, 1, 256, 64])]; - tensor var_9399 = reshape(shape = var_9398, x = value_states_89)[name = string("op_9399")]; - tensor var_9404 = const()[name = string("op_9404"), val = tensor([0, 1, 3, 2])]; - int32 var_9415 = const()[name = string("op_9415"), val = int32(-1)]; - fp16 const_479_promoted = const()[name = string("const_479_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_181 = transpose(perm = var_9382, x = var_9377)[name = string("transpose_133")]; - tensor var_9417 = mul(x = hidden_states_181, y = const_479_promoted)[name = string("op_9417")]; - bool input_225_interleave_0 = const()[name = string("input_225_interleave_0"), val = bool(false)]; - tensor input_225 = concat(axis = var_9415, interleave = input_225_interleave_0, values = (hidden_states_181, var_9417))[name = string("input_225")]; - tensor normed_269_axes_0 = const()[name = string("normed_269_axes_0"), val = tensor([-1])]; - fp16 var_9412_to_fp16 = const()[name = string("op_9412_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_269_cast_fp16 = layer_norm(axes = normed_269_axes_0, epsilon = var_9412_to_fp16, x = input_225)[name = string("normed_269_cast_fp16")]; - tensor normed_271_begin_0 = const()[name = string("normed_271_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_271_end_0 = const()[name = string("normed_271_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_271_end_mask_0 = const()[name = string("normed_271_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_271 = slice_by_index(begin = normed_271_begin_0, end = normed_271_end_0, end_mask = normed_271_end_mask_0, x = normed_269_cast_fp16)[name = string("normed_271")]; - tensor var_9431_to_fp16 = const()[name = string("op_9431_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261701632)))]; - tensor q_23_cast_fp16 = mul(x = normed_271, y = var_9431_to_fp16)[name = string("q_23_cast_fp16")]; - int32 var_9442 = const()[name = string("op_9442"), val = int32(-1)]; - fp16 const_483_promoted = const()[name = string("const_483_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_183 = transpose(perm = var_9393, x = var_9388)[name = string("transpose_132")]; - tensor var_9444 = mul(x = hidden_states_183, y = const_483_promoted)[name = string("op_9444")]; - bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)]; - tensor input_227 = concat(axis = var_9442, interleave = input_227_interleave_0, values = (hidden_states_183, var_9444))[name = string("input_227")]; - tensor normed_273_axes_0 = const()[name = string("normed_273_axes_0"), val = tensor([-1])]; - fp16 var_9439_to_fp16 = const()[name = string("op_9439_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_273_cast_fp16 = layer_norm(axes = normed_273_axes_0, epsilon = var_9439_to_fp16, x = input_227)[name = string("normed_273_cast_fp16")]; - tensor normed_275_begin_0 = const()[name = string("normed_275_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_275_end_0 = const()[name = string("normed_275_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_275_end_mask_0 = const()[name = string("normed_275_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_275 = slice_by_index(begin = normed_275_begin_0, end = normed_275_end_0, end_mask = normed_275_end_mask_0, x = normed_273_cast_fp16)[name = string("normed_275")]; - tensor var_9458_to_fp16 = const()[name = string("op_9458_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261702208)))]; - tensor k_23_cast_fp16 = mul(x = normed_275, y = var_9458_to_fp16)[name = string("k_23_cast_fp16")]; - tensor var_9472_cast_fp16 = mul(x = q_23_cast_fp16, y = cos_35)[name = string("op_9472_cast_fp16")]; - tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23_cast_fp16)[name = string("x1_45_cast_fp16")]; - tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23_cast_fp16)[name = string("x2_45_cast_fp16")]; - fp16 const_489_promoted_to_fp16 = const()[name = string("const_489_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9493_cast_fp16 = mul(x = x2_45_cast_fp16, y = const_489_promoted_to_fp16)[name = string("op_9493_cast_fp16")]; - int32 var_9495 = const()[name = string("op_9495"), val = int32(-1)]; - bool var_9496_interleave_0 = const()[name = string("op_9496_interleave_0"), val = bool(false)]; - tensor var_9496_cast_fp16 = concat(axis = var_9495, interleave = var_9496_interleave_0, values = (var_9493_cast_fp16, x1_45_cast_fp16))[name = string("op_9496_cast_fp16")]; - tensor var_9497_cast_fp16 = mul(x = var_9496_cast_fp16, y = sin_35)[name = string("op_9497_cast_fp16")]; - tensor query_states_91_cast_fp16 = add(x = var_9472_cast_fp16, y = var_9497_cast_fp16)[name = string("query_states_91_cast_fp16")]; - tensor var_9500_cast_fp16 = mul(x = k_23_cast_fp16, y = cos_35)[name = string("op_9500_cast_fp16")]; - tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23_cast_fp16)[name = string("x1_47_cast_fp16")]; - tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23_cast_fp16)[name = string("x2_47_cast_fp16")]; - fp16 const_492_promoted_to_fp16 = const()[name = string("const_492_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9521_cast_fp16 = mul(x = x2_47_cast_fp16, y = const_492_promoted_to_fp16)[name = string("op_9521_cast_fp16")]; - int32 var_9523 = const()[name = string("op_9523"), val = int32(-1)]; - bool var_9524_interleave_0 = const()[name = string("op_9524_interleave_0"), val = bool(false)]; - tensor var_9524_cast_fp16 = concat(axis = var_9523, interleave = var_9524_interleave_0, values = (var_9521_cast_fp16, x1_47_cast_fp16))[name = string("op_9524_cast_fp16")]; - tensor var_9525_cast_fp16 = mul(x = var_9524_cast_fp16, y = sin_35)[name = string("op_9525_cast_fp16")]; - tensor key_states_113_cast_fp16 = add(x = var_9500_cast_fp16, y = var_9525_cast_fp16)[name = string("key_states_113_cast_fp16")]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_3_stride_0, update = key_states_113_cast_fp16, x = coreml_update_state_63)[name = string("model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_126_write_state")]; - tensor coreml_update_state_74 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_126")]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_91 = transpose(perm = var_9404, x = var_9399)[name = string("transpose_131")]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_4_stride_0, update = value_states_91, x = coreml_update_state_74)[name = string("model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_127_write_state")]; - tensor coreml_update_state_75 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_127")]; - tensor var_9624_begin_0 = const()[name = string("op_9624_begin_0"), val = tensor([1, 0, 0, 0])]; - tensor var_9624_end_0 = const()[name = string("op_9624_end_0"), val = tensor([2, 1, 4096, 256])]; - tensor var_9624_end_mask_0 = const()[name = string("op_9624_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_9624_cast_fp16 = slice_by_index(begin = var_9624_begin_0, end = var_9624_end_0, end_mask = var_9624_end_mask_0, x = coreml_update_state_75)[name = string("op_9624_cast_fp16")]; - tensor var_9631_begin_0 = const()[name = string("op_9631_begin_0"), val = tensor([5, 0, 0, 0])]; - tensor var_9631_end_0 = const()[name = string("op_9631_end_0"), val = tensor([6, 1, 4096, 256])]; - tensor var_9631_end_mask_0 = const()[name = string("op_9631_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_9631_cast_fp16 = slice_by_index(begin = var_9631_begin_0, end = var_9631_end_0, end_mask = var_9631_end_mask_0, x = coreml_update_state_75)[name = string("op_9631_cast_fp16")]; - tensor var_9670 = const()[name = string("op_9670"), val = tensor([1, 4, 1, 1])]; - tensor x_181_cast_fp16 = tile(reps = var_9670, x = var_9624_cast_fp16)[name = string("x_181_cast_fp16")]; - tensor var_9690 = const()[name = string("op_9690"), val = tensor([1, 4, 1, 1])]; - tensor x_187_cast_fp16 = tile(reps = var_9690, x = var_9631_cast_fp16)[name = string("x_187_cast_fp16")]; - bool var_9717_transpose_x_0 = const()[name = string("op_9717_transpose_x_0"), val = bool(false)]; - bool var_9717_transpose_y_0 = const()[name = string("op_9717_transpose_y_0"), val = bool(true)]; - tensor var_9717 = matmul(transpose_x = var_9717_transpose_x_0, transpose_y = var_9717_transpose_y_0, x = query_states_91_cast_fp16, y = x_181_cast_fp16)[name = string("op_9717")]; - fp16 var_9718_to_fp16 = const()[name = string("op_9718_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_45_cast_fp16 = mul(x = var_9717, y = var_9718_to_fp16)[name = string("attn_weights_45_cast_fp16")]; - tensor attn_weights_47_cast_fp16 = add(x = attn_weights_45_cast_fp16, y = causal_mask)[name = string("attn_weights_47_cast_fp16")]; - int32 var_9753 = const()[name = string("op_9753"), val = int32(-1)]; - tensor var_9755_cast_fp16 = softmax(axis = var_9753, x = attn_weights_47_cast_fp16)[name = string("op_9755_cast_fp16")]; - tensor concat_210 = const()[name = string("concat_210"), val = tensor([4, 64, 4096])]; - tensor reshape_33_cast_fp16 = reshape(shape = concat_210, x = var_9755_cast_fp16)[name = string("reshape_33_cast_fp16")]; - tensor concat_211 = const()[name = string("concat_211"), val = tensor([4, 4096, 256])]; - tensor reshape_34_cast_fp16 = reshape(shape = concat_211, x = x_187_cast_fp16)[name = string("reshape_34_cast_fp16")]; - bool matmul_11_transpose_x_0 = const()[name = string("matmul_11_transpose_x_0"), val = bool(false)]; - bool matmul_11_transpose_y_0 = const()[name = string("matmul_11_transpose_y_0"), val = bool(false)]; - tensor matmul_11_cast_fp16 = matmul(transpose_x = matmul_11_transpose_x_0, transpose_y = matmul_11_transpose_y_0, x = reshape_33_cast_fp16, y = reshape_34_cast_fp16)[name = string("matmul_11_cast_fp16")]; - tensor concat_215 = const()[name = string("concat_215"), val = tensor([1, 4, 64, 256])]; - tensor reshape_35_cast_fp16 = reshape(shape = concat_215, x = matmul_11_cast_fp16)[name = string("reshape_35_cast_fp16")]; - tensor var_9767_perm_0 = const()[name = string("op_9767_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_9786 = const()[name = string("op_9786"), val = tensor([1, 64, 1024])]; - tensor var_9767_cast_fp16 = transpose(perm = var_9767_perm_0, x = reshape_35_cast_fp16)[name = string("transpose_130")]; - tensor attn_output_115_cast_fp16 = reshape(shape = var_9786, x = var_9767_cast_fp16)[name = string("attn_output_115_cast_fp16")]; - tensor var_9791 = const()[name = string("op_9791"), val = tensor([0, 2, 1])]; - string var_9807_pad_type_0 = const()[name = string("op_9807_pad_type_0"), val = string("valid")]; - int32 var_9807_groups_0 = const()[name = string("op_9807_groups_0"), val = int32(1)]; - tensor var_9807_strides_0 = const()[name = string("op_9807_strides_0"), val = tensor([1])]; - tensor var_9807_pad_0 = const()[name = string("op_9807_pad_0"), val = tensor([0, 0])]; - tensor var_9807_dilations_0 = const()[name = string("op_9807_dilations_0"), val = tensor([1])]; - tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261702784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262587584))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_9792_cast_fp16 = transpose(perm = var_9791, x = attn_output_115_cast_fp16)[name = string("transpose_129")]; - tensor var_9807_cast_fp16 = conv(dilations = var_9807_dilations_0, groups = var_9807_groups_0, pad = var_9807_pad_0, pad_type = var_9807_pad_type_0, strides = var_9807_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_9792_cast_fp16)[name = string("op_9807_cast_fp16")]; - tensor var_9811 = const()[name = string("op_9811"), val = tensor([0, 2, 1])]; - int32 var_9822 = const()[name = string("op_9822"), val = int32(-1)]; - fp16 const_504_promoted_to_fp16 = const()[name = string("const_504_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_185_cast_fp16 = transpose(perm = var_9811, x = var_9807_cast_fp16)[name = string("transpose_128")]; - tensor var_9824_cast_fp16 = mul(x = hidden_states_185_cast_fp16, y = const_504_promoted_to_fp16)[name = string("op_9824_cast_fp16")]; - bool input_231_interleave_0 = const()[name = string("input_231_interleave_0"), val = bool(false)]; - tensor input_231_cast_fp16 = concat(axis = var_9822, interleave = input_231_interleave_0, values = (hidden_states_185_cast_fp16, var_9824_cast_fp16))[name = string("input_231_cast_fp16")]; - tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; - fp16 var_9819_to_fp16 = const()[name = string("op_9819_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_9819_to_fp16, x = input_231_cast_fp16)[name = string("normed_277_cast_fp16")]; - tensor normed_279_begin_0 = const()[name = string("normed_279_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_279_end_0 = const()[name = string("normed_279_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_279_end_mask_0 = const()[name = string("normed_279_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_279_cast_fp16 = slice_by_index(begin = normed_279_begin_0, end = normed_279_end_0, end_mask = normed_279_end_mask_0, x = normed_277_cast_fp16)[name = string("normed_279_cast_fp16")]; - tensor var_9838_to_fp16 = const()[name = string("op_9838_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262606080)))]; - tensor attn_output_119_cast_fp16 = mul(x = normed_279_cast_fp16, y = var_9838_to_fp16)[name = string("attn_output_119_cast_fp16")]; - tensor hidden_states_187_cast_fp16 = add(x = hidden_states_177_cast_fp16, y = attn_output_119_cast_fp16)[name = string("hidden_states_187_cast_fp16")]; - int32 var_9851 = const()[name = string("op_9851"), val = int32(-1)]; - fp16 const_508_promoted_to_fp16 = const()[name = string("const_508_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9853_cast_fp16 = mul(x = hidden_states_187_cast_fp16, y = const_508_promoted_to_fp16)[name = string("op_9853_cast_fp16")]; - bool input_233_interleave_0 = const()[name = string("input_233_interleave_0"), val = bool(false)]; - tensor input_233_cast_fp16 = concat(axis = var_9851, interleave = input_233_interleave_0, values = (hidden_states_187_cast_fp16, var_9853_cast_fp16))[name = string("input_233_cast_fp16")]; - tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; - fp16 var_9848_to_fp16 = const()[name = string("op_9848_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_9848_to_fp16, x = input_233_cast_fp16)[name = string("normed_281_cast_fp16")]; - tensor normed_283_begin_0 = const()[name = string("normed_283_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_283_end_0 = const()[name = string("normed_283_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_283_end_mask_0 = const()[name = string("normed_283_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_283_cast_fp16 = slice_by_index(begin = normed_283_begin_0, end = normed_283_end_0, end_mask = normed_283_end_mask_0, x = normed_281_cast_fp16)[name = string("normed_283_cast_fp16")]; - tensor var_9867_to_fp16 = const()[name = string("op_9867_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262608448)))]; - tensor x_189_cast_fp16 = mul(x = normed_283_cast_fp16, y = var_9867_to_fp16)[name = string("x_189_cast_fp16")]; - tensor var_9879 = const()[name = string("op_9879"), val = tensor([0, 2, 1])]; - tensor input_235_axes_0 = const()[name = string("input_235_axes_0"), val = tensor([2])]; - tensor var_9880_cast_fp16 = transpose(perm = var_9879, x = x_189_cast_fp16)[name = string("transpose_127")]; - tensor input_235_cast_fp16 = expand_dims(axes = input_235_axes_0, x = var_9880_cast_fp16)[name = string("input_235_cast_fp16")]; - string x_191_pad_type_0 = const()[name = string("x_191_pad_type_0"), val = string("valid")]; - tensor x_191_strides_0 = const()[name = string("x_191_strides_0"), val = tensor([1, 1])]; - tensor x_191_pad_0 = const()[name = string("x_191_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_191_dilations_0 = const()[name = string("x_191_dilations_0"), val = tensor([1, 1])]; - int32 x_191_groups_0 = const()[name = string("x_191_groups_0"), val = int32(1)]; - tensor model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1012579072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1018551104))))[name = string("model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_191_cast_fp16 = conv(dilations = x_191_dilations_0, groups = x_191_groups_0, pad = x_191_pad_0, pad_type = x_191_pad_type_0, strides = x_191_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_235_cast_fp16)[name = string("x_191_cast_fp16")]; - string b_23_pad_type_0 = const()[name = string("b_23_pad_type_0"), val = string("valid")]; - tensor b_23_strides_0 = const()[name = string("b_23_strides_0"), val = tensor([1, 1])]; - tensor b_23_pad_0 = const()[name = string("b_23_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_23_dilations_0 = const()[name = string("b_23_dilations_0"), val = tensor([1, 1])]; - int32 b_23_groups_0 = const()[name = string("b_23_groups_0"), val = int32(1)]; - tensor model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1018661760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1024633792))))[name = string("model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_23_cast_fp16 = conv(dilations = b_23_dilations_0, groups = b_23_groups_0, pad = b_23_pad_0, pad_type = b_23_pad_type_0, strides = b_23_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_235_cast_fp16)[name = string("b_23_cast_fp16")]; - string var_9905_mode_0 = const()[name = string("op_9905_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_9905_cast_fp16 = gelu(mode = var_9905_mode_0, x = x_191_cast_fp16)[name = string("op_9905_cast_fp16")]; - tensor input_237_cast_fp16 = mul(x = var_9905_cast_fp16, y = b_23_cast_fp16)[name = string("input_237_cast_fp16")]; - string e_23_pad_type_0 = const()[name = string("e_23_pad_type_0"), val = string("valid")]; - tensor e_23_strides_0 = const()[name = string("e_23_strides_0"), val = tensor([1, 1])]; - tensor e_23_pad_0 = const()[name = string("e_23_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_23_dilations_0 = const()[name = string("e_23_dilations_0"), val = tensor([1, 1])]; - int32 e_23_groups_0 = const()[name = string("e_23_groups_0"), val = int32(1)]; - tensor model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274776192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280748224))))[name = string("model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_23_cast_fp16 = conv(dilations = e_23_dilations_0, groups = e_23_groups_0, pad = e_23_pad_0, pad_type = e_23_pad_type_0, strides = e_23_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_237_cast_fp16)[name = string("e_23_cast_fp16")]; - tensor var_9913_axes_0 = const()[name = string("op_9913_axes_0"), val = tensor([2])]; - tensor var_9913_cast_fp16 = squeeze(axes = var_9913_axes_0, x = e_23_cast_fp16)[name = string("op_9913_cast_fp16")]; - tensor var_9914 = const()[name = string("op_9914"), val = tensor([0, 2, 1])]; - int32 var_9925 = const()[name = string("op_9925"), val = int32(-1)]; - fp16 const_512_promoted_to_fp16 = const()[name = string("const_512_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_189_cast_fp16 = transpose(perm = var_9914, x = var_9913_cast_fp16)[name = string("transpose_126")]; - tensor var_9927_cast_fp16 = mul(x = hidden_states_189_cast_fp16, y = const_512_promoted_to_fp16)[name = string("op_9927_cast_fp16")]; - bool input_239_interleave_0 = const()[name = string("input_239_interleave_0"), val = bool(false)]; - tensor input_239_cast_fp16 = concat(axis = var_9925, interleave = input_239_interleave_0, values = (hidden_states_189_cast_fp16, var_9927_cast_fp16))[name = string("input_239_cast_fp16")]; - tensor normed_285_axes_0 = const()[name = string("normed_285_axes_0"), val = tensor([-1])]; - fp16 var_9922_to_fp16 = const()[name = string("op_9922_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_285_cast_fp16 = layer_norm(axes = normed_285_axes_0, epsilon = var_9922_to_fp16, x = input_239_cast_fp16)[name = string("normed_285_cast_fp16")]; - tensor normed_287_begin_0 = const()[name = string("normed_287_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_287_end_0 = const()[name = string("normed_287_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_287_end_mask_0 = const()[name = string("normed_287_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_287_cast_fp16 = slice_by_index(begin = normed_287_begin_0, end = normed_287_end_0, end_mask = normed_287_end_mask_0, x = normed_285_cast_fp16)[name = string("normed_287_cast_fp16")]; - tensor var_9941_to_fp16 = const()[name = string("op_9941_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280766720)))]; - tensor hidden_states_191_cast_fp16 = mul(x = normed_287_cast_fp16, y = var_9941_to_fp16)[name = string("hidden_states_191_cast_fp16")]; - tensor hidden_states_193_cast_fp16 = add(x = hidden_states_187_cast_fp16, y = hidden_states_191_cast_fp16)[name = string("hidden_states_193_cast_fp16")]; - int32 var_9995 = const()[name = string("op_9995"), val = int32(-1)]; - fp16 const_517_promoted_to_fp16 = const()[name = string("const_517_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9997_cast_fp16 = mul(x = hidden_states_193_cast_fp16, y = const_517_promoted_to_fp16)[name = string("op_9997_cast_fp16")]; - bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; - tensor input_241_cast_fp16 = concat(axis = var_9995, interleave = input_241_interleave_0, values = (hidden_states_193_cast_fp16, var_9997_cast_fp16))[name = string("input_241_cast_fp16")]; - tensor normed_289_axes_0 = const()[name = string("normed_289_axes_0"), val = tensor([-1])]; - fp16 var_9992_to_fp16 = const()[name = string("op_9992_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_289_cast_fp16 = layer_norm(axes = normed_289_axes_0, epsilon = var_9992_to_fp16, x = input_241_cast_fp16)[name = string("normed_289_cast_fp16")]; - tensor normed_291_begin_0 = const()[name = string("normed_291_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_291_end_0 = const()[name = string("normed_291_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_291_end_mask_0 = const()[name = string("normed_291_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_291_cast_fp16 = slice_by_index(begin = normed_291_begin_0, end = normed_291_end_0, end_mask = normed_291_end_mask_0, x = normed_289_cast_fp16)[name = string("normed_291_cast_fp16")]; - tensor var_10011_to_fp16 = const()[name = string("op_10011_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280769088)))]; - tensor hidden_states_195_cast_fp16 = mul(x = normed_291_cast_fp16, y = var_10011_to_fp16)[name = string("hidden_states_195_cast_fp16")]; - tensor var_10022 = const()[name = string("op_10022"), val = tensor([0, 2, 1])]; - tensor var_10025_axes_0 = const()[name = string("op_10025_axes_0"), val = tensor([2])]; - tensor var_10023_cast_fp16 = transpose(perm = var_10022, x = hidden_states_195_cast_fp16)[name = string("transpose_125")]; - tensor var_10025_cast_fp16 = expand_dims(axes = var_10025_axes_0, x = var_10023_cast_fp16)[name = string("op_10025_cast_fp16")]; - string query_states_97_pad_type_0 = const()[name = string("query_states_97_pad_type_0"), val = string("valid")]; - tensor query_states_97_strides_0 = const()[name = string("query_states_97_strides_0"), val = tensor([1, 1])]; - tensor query_states_97_pad_0 = const()[name = string("query_states_97_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_97_dilations_0 = const()[name = string("query_states_97_dilations_0"), val = tensor([1, 1])]; - int32 query_states_97_groups_0 = const()[name = string("query_states_97_groups_0"), val = int32(1)]; - tensor query_states_97 = conv(dilations = query_states_97_dilations_0, groups = query_states_97_groups_0, pad = query_states_97_pad_0, pad_type = query_states_97_pad_type_0, strides = query_states_97_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_10025_cast_fp16)[name = string("query_states_97")]; - string key_states_121_pad_type_0 = const()[name = string("key_states_121_pad_type_0"), val = string("valid")]; - tensor key_states_121_strides_0 = const()[name = string("key_states_121_strides_0"), val = tensor([1, 1])]; - tensor key_states_121_pad_0 = const()[name = string("key_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_121_dilations_0 = const()[name = string("key_states_121_dilations_0"), val = tensor([1, 1])]; - int32 key_states_121_groups_0 = const()[name = string("key_states_121_groups_0"), val = int32(1)]; - tensor key_states_121 = conv(dilations = key_states_121_dilations_0, groups = key_states_121_groups_0, pad = key_states_121_pad_0, pad_type = key_states_121_pad_type_0, strides = key_states_121_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_10025_cast_fp16)[name = string("key_states_121")]; - string value_states_97_pad_type_0 = const()[name = string("value_states_97_pad_type_0"), val = string("valid")]; - tensor value_states_97_strides_0 = const()[name = string("value_states_97_strides_0"), val = tensor([1, 1])]; - tensor value_states_97_pad_0 = const()[name = string("value_states_97_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_97_dilations_0 = const()[name = string("value_states_97_dilations_0"), val = tensor([1, 1])]; - int32 value_states_97_groups_0 = const()[name = string("value_states_97_groups_0"), val = int32(1)]; - tensor value_states_97 = conv(dilations = value_states_97_dilations_0, groups = value_states_97_groups_0, pad = value_states_97_pad_0, pad_type = value_states_97_pad_type_0, strides = value_states_97_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_10025_cast_fp16)[name = string("value_states_97")]; - tensor var_10067 = const()[name = string("op_10067"), val = tensor([1, 4, 256, 64])]; - tensor var_10068 = reshape(shape = var_10067, x = query_states_97)[name = string("op_10068")]; - tensor var_10073 = const()[name = string("op_10073"), val = tensor([0, 1, 3, 2])]; - tensor var_10078 = const()[name = string("op_10078"), val = tensor([1, 1, 256, 64])]; - tensor var_10079 = reshape(shape = var_10078, x = key_states_121)[name = string("op_10079")]; - tensor var_10084 = const()[name = string("op_10084"), val = tensor([0, 1, 3, 2])]; - tensor var_10089 = const()[name = string("op_10089"), val = tensor([1, 1, 256, 64])]; - tensor var_10090 = reshape(shape = var_10089, x = value_states_97)[name = string("op_10090")]; - tensor var_10095 = const()[name = string("op_10095"), val = tensor([0, 1, 3, 2])]; - int32 var_10106 = const()[name = string("op_10106"), val = int32(-1)]; - fp16 const_522_promoted = const()[name = string("const_522_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_197 = transpose(perm = var_10073, x = var_10068)[name = string("transpose_124")]; - tensor var_10108 = mul(x = hidden_states_197, y = const_522_promoted)[name = string("op_10108")]; - bool input_245_interleave_0 = const()[name = string("input_245_interleave_0"), val = bool(false)]; - tensor input_245 = concat(axis = var_10106, interleave = input_245_interleave_0, values = (hidden_states_197, var_10108))[name = string("input_245")]; - tensor normed_293_axes_0 = const()[name = string("normed_293_axes_0"), val = tensor([-1])]; - fp16 var_10103_to_fp16 = const()[name = string("op_10103_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_293_cast_fp16 = layer_norm(axes = normed_293_axes_0, epsilon = var_10103_to_fp16, x = input_245)[name = string("normed_293_cast_fp16")]; - tensor normed_295_begin_0 = const()[name = string("normed_295_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_295_end_0 = const()[name = string("normed_295_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_295_end_mask_0 = const()[name = string("normed_295_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_295 = slice_by_index(begin = normed_295_begin_0, end = normed_295_end_0, end_mask = normed_295_end_mask_0, x = normed_293_cast_fp16)[name = string("normed_295")]; - tensor var_10122_to_fp16 = const()[name = string("op_10122_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280771456)))]; - tensor q_25_cast_fp16 = mul(x = normed_295, y = var_10122_to_fp16)[name = string("q_25_cast_fp16")]; - int32 var_10133 = const()[name = string("op_10133"), val = int32(-1)]; - fp16 const_526_promoted = const()[name = string("const_526_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_199 = transpose(perm = var_10084, x = var_10079)[name = string("transpose_123")]; - tensor var_10135 = mul(x = hidden_states_199, y = const_526_promoted)[name = string("op_10135")]; - bool input_247_interleave_0 = const()[name = string("input_247_interleave_0"), val = bool(false)]; - tensor input_247 = concat(axis = var_10133, interleave = input_247_interleave_0, values = (hidden_states_199, var_10135))[name = string("input_247")]; - tensor normed_297_axes_0 = const()[name = string("normed_297_axes_0"), val = tensor([-1])]; - fp16 var_10130_to_fp16 = const()[name = string("op_10130_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_297_cast_fp16 = layer_norm(axes = normed_297_axes_0, epsilon = var_10130_to_fp16, x = input_247)[name = string("normed_297_cast_fp16")]; - tensor normed_299_begin_0 = const()[name = string("normed_299_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_299_end_0 = const()[name = string("normed_299_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_299_end_mask_0 = const()[name = string("normed_299_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_299 = slice_by_index(begin = normed_299_begin_0, end = normed_299_end_0, end_mask = normed_299_end_mask_0, x = normed_297_cast_fp16)[name = string("normed_299")]; - tensor var_10149_to_fp16 = const()[name = string("op_10149_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280772032)))]; - tensor k_25_cast_fp16 = mul(x = normed_299, y = var_10149_to_fp16)[name = string("k_25_cast_fp16")]; - tensor var_10163_cast_fp16 = mul(x = q_25_cast_fp16, y = cos_5)[name = string("op_10163_cast_fp16")]; - tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25_cast_fp16)[name = string("x1_49_cast_fp16")]; - tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25_cast_fp16)[name = string("x2_49_cast_fp16")]; - fp16 const_532_promoted_to_fp16 = const()[name = string("const_532_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10184_cast_fp16 = mul(x = x2_49_cast_fp16, y = const_532_promoted_to_fp16)[name = string("op_10184_cast_fp16")]; - int32 var_10186 = const()[name = string("op_10186"), val = int32(-1)]; - bool var_10187_interleave_0 = const()[name = string("op_10187_interleave_0"), val = bool(false)]; - tensor var_10187_cast_fp16 = concat(axis = var_10186, interleave = var_10187_interleave_0, values = (var_10184_cast_fp16, x1_49_cast_fp16))[name = string("op_10187_cast_fp16")]; - tensor var_10188_cast_fp16 = mul(x = var_10187_cast_fp16, y = sin_5)[name = string("op_10188_cast_fp16")]; - tensor query_states_99_cast_fp16 = add(x = var_10163_cast_fp16, y = var_10188_cast_fp16)[name = string("query_states_99_cast_fp16")]; - tensor var_10191_cast_fp16 = mul(x = k_25_cast_fp16, y = cos_5)[name = string("op_10191_cast_fp16")]; - tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25_cast_fp16)[name = string("x1_51_cast_fp16")]; - tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25_cast_fp16)[name = string("x2_51_cast_fp16")]; - fp16 const_535_promoted_to_fp16 = const()[name = string("const_535_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10212_cast_fp16 = mul(x = x2_51_cast_fp16, y = const_535_promoted_to_fp16)[name = string("op_10212_cast_fp16")]; - int32 var_10214 = const()[name = string("op_10214"), val = int32(-1)]; - bool var_10215_interleave_0 = const()[name = string("op_10215_interleave_0"), val = bool(false)]; - tensor var_10215_cast_fp16 = concat(axis = var_10214, interleave = var_10215_interleave_0, values = (var_10212_cast_fp16, x1_51_cast_fp16))[name = string("op_10215_cast_fp16")]; - tensor var_10216_cast_fp16 = mul(x = var_10215_cast_fp16, y = sin_5)[name = string("op_10216_cast_fp16")]; - tensor key_states_123_cast_fp16 = add(x = var_10191_cast_fp16, y = var_10216_cast_fp16)[name = string("key_states_123_cast_fp16")]; - tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([10])]; - tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; - tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; - tensor expand_dims_148 = const()[name = string("expand_dims_148"), val = tensor([11])]; - int32 concat_218_axis_0 = const()[name = string("concat_218_axis_0"), val = int32(0)]; - bool concat_218_interleave_0 = const()[name = string("concat_218_interleave_0"), val = bool(false)]; - tensor concat_218 = concat(axis = concat_218_axis_0, interleave = concat_218_interleave_0, values = (expand_dims_144, expand_dims_145, current_pos, expand_dims_147))[name = string("concat_218")]; - tensor concat_219_values1_0 = const()[name = string("concat_219_values1_0"), val = tensor([0])]; - tensor concat_219_values3_0 = const()[name = string("concat_219_values3_0"), val = tensor([0])]; - int32 concat_219_axis_0 = const()[name = string("concat_219_axis_0"), val = int32(0)]; - bool concat_219_interleave_0 = const()[name = string("concat_219_interleave_0"), val = bool(false)]; - tensor concat_219 = concat(axis = concat_219_axis_0, interleave = concat_219_interleave_0, values = (expand_dims_148, concat_219_values1_0, end_pos_1, concat_219_values3_0))[name = string("concat_219")]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_218, begin_mask = model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0, end = concat_219, end_mask = model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_21_stride_0, update = key_states_123_cast_fp16, x = coreml_update_state_73)[name = string("model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_128_write_state")]; - tensor coreml_update_state_76 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_128")]; - tensor expand_dims_150 = const()[name = string("expand_dims_150"), val = tensor([32])]; - tensor expand_dims_151 = const()[name = string("expand_dims_151"), val = tensor([0])]; - tensor expand_dims_153 = const()[name = string("expand_dims_153"), val = tensor([0])]; - tensor expand_dims_154 = const()[name = string("expand_dims_154"), val = tensor([33])]; - int32 concat_222_axis_0 = const()[name = string("concat_222_axis_0"), val = int32(0)]; - bool concat_222_interleave_0 = const()[name = string("concat_222_interleave_0"), val = bool(false)]; - tensor concat_222 = concat(axis = concat_222_axis_0, interleave = concat_222_interleave_0, values = (expand_dims_150, expand_dims_151, current_pos, expand_dims_153))[name = string("concat_222")]; - tensor concat_223_values1_0 = const()[name = string("concat_223_values1_0"), val = tensor([0])]; - tensor concat_223_values3_0 = const()[name = string("concat_223_values3_0"), val = tensor([0])]; - int32 concat_223_axis_0 = const()[name = string("concat_223_axis_0"), val = int32(0)]; - bool concat_223_interleave_0 = const()[name = string("concat_223_interleave_0"), val = bool(false)]; - tensor concat_223 = concat(axis = concat_223_axis_0, interleave = concat_223_interleave_0, values = (expand_dims_154, concat_223_values1_0, end_pos_1, concat_223_values3_0))[name = string("concat_223")]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_99 = transpose(perm = var_10095, x = var_10090)[name = string("transpose_122")]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_222, begin_mask = model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0, end = concat_223, end_mask = model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_22_stride_0, update = value_states_99, x = coreml_update_state_76)[name = string("model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_129_write_state")]; - tensor coreml_update_state_77 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_129")]; - tensor var_10315_begin_0 = const()[name = string("op_10315_begin_0"), val = tensor([10, 0, 0, 0])]; - tensor var_10315_end_0 = const()[name = string("op_10315_end_0"), val = tensor([11, 1, 512, 256])]; - tensor var_10315_end_mask_0 = const()[name = string("op_10315_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_10315_cast_fp16 = slice_by_index(begin = var_10315_begin_0, end = var_10315_end_0, end_mask = var_10315_end_mask_0, x = coreml_update_state_77)[name = string("op_10315_cast_fp16")]; - tensor var_10322_begin_0 = const()[name = string("op_10322_begin_0"), val = tensor([32, 0, 0, 0])]; - tensor var_10322_end_0 = const()[name = string("op_10322_end_0"), val = tensor([33, 1, 512, 256])]; - tensor var_10322_end_mask_0 = const()[name = string("op_10322_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_10322_cast_fp16 = slice_by_index(begin = var_10322_begin_0, end = var_10322_end_0, end_mask = var_10322_end_mask_0, x = coreml_update_state_77)[name = string("op_10322_cast_fp16")]; - tensor var_10361 = const()[name = string("op_10361"), val = tensor([1, 4, 1, 1])]; - tensor x_197_cast_fp16 = tile(reps = var_10361, x = var_10315_cast_fp16)[name = string("x_197_cast_fp16")]; - tensor var_10381 = const()[name = string("op_10381"), val = tensor([1, 4, 1, 1])]; - tensor x_203_cast_fp16 = tile(reps = var_10381, x = var_10322_cast_fp16)[name = string("x_203_cast_fp16")]; - bool var_10408_transpose_x_0 = const()[name = string("op_10408_transpose_x_0"), val = bool(false)]; - bool var_10408_transpose_y_0 = const()[name = string("op_10408_transpose_y_0"), val = bool(true)]; - tensor var_10408 = matmul(transpose_x = var_10408_transpose_x_0, transpose_y = var_10408_transpose_y_0, x = query_states_99_cast_fp16, y = x_197_cast_fp16)[name = string("op_10408")]; - fp16 var_10409_to_fp16 = const()[name = string("op_10409_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_49_cast_fp16 = mul(x = var_10408, y = var_10409_to_fp16)[name = string("attn_weights_49_cast_fp16")]; - tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = mask_slice_1)[name = string("attn_weights_51_cast_fp16")]; - int32 var_10444 = const()[name = string("op_10444"), val = int32(-1)]; - tensor var_10446_cast_fp16 = softmax(axis = var_10444, x = attn_weights_51_cast_fp16)[name = string("op_10446_cast_fp16")]; - tensor concat_228 = const()[name = string("concat_228"), val = tensor([4, 64, 512])]; - tensor reshape_36_cast_fp16 = reshape(shape = concat_228, x = var_10446_cast_fp16)[name = string("reshape_36_cast_fp16")]; - tensor concat_229 = const()[name = string("concat_229"), val = tensor([4, 512, 256])]; - tensor reshape_37_cast_fp16 = reshape(shape = concat_229, x = x_203_cast_fp16)[name = string("reshape_37_cast_fp16")]; - bool matmul_12_transpose_x_0 = const()[name = string("matmul_12_transpose_x_0"), val = bool(false)]; - bool matmul_12_transpose_y_0 = const()[name = string("matmul_12_transpose_y_0"), val = bool(false)]; - tensor matmul_12_cast_fp16 = matmul(transpose_x = matmul_12_transpose_x_0, transpose_y = matmul_12_transpose_y_0, x = reshape_36_cast_fp16, y = reshape_37_cast_fp16)[name = string("matmul_12_cast_fp16")]; - tensor concat_233 = const()[name = string("concat_233"), val = tensor([1, 4, 64, 256])]; - tensor reshape_38_cast_fp16 = reshape(shape = concat_233, x = matmul_12_cast_fp16)[name = string("reshape_38_cast_fp16")]; - tensor var_10458_perm_0 = const()[name = string("op_10458_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_10477 = const()[name = string("op_10477"), val = tensor([1, 64, 1024])]; - tensor var_10458_cast_fp16 = transpose(perm = var_10458_perm_0, x = reshape_38_cast_fp16)[name = string("transpose_121")]; - tensor attn_output_125_cast_fp16 = reshape(shape = var_10477, x = var_10458_cast_fp16)[name = string("attn_output_125_cast_fp16")]; - tensor var_10482 = const()[name = string("op_10482"), val = tensor([0, 2, 1])]; - string var_10498_pad_type_0 = const()[name = string("op_10498_pad_type_0"), val = string("valid")]; - int32 var_10498_groups_0 = const()[name = string("op_10498_groups_0"), val = int32(1)]; - tensor var_10498_strides_0 = const()[name = string("op_10498_strides_0"), val = tensor([1])]; - tensor var_10498_pad_0 = const()[name = string("op_10498_pad_0"), val = tensor([0, 0])]; - tensor var_10498_dilations_0 = const()[name = string("op_10498_dilations_0"), val = tensor([1])]; - tensor squeeze_12_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280772608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281657408))))[name = string("squeeze_12_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_10483_cast_fp16 = transpose(perm = var_10482, x = attn_output_125_cast_fp16)[name = string("transpose_120")]; - tensor var_10498_cast_fp16 = conv(dilations = var_10498_dilations_0, groups = var_10498_groups_0, pad = var_10498_pad_0, pad_type = var_10498_pad_type_0, strides = var_10498_strides_0, weight = squeeze_12_cast_fp16_to_fp32_to_fp16_palettized, x = var_10483_cast_fp16)[name = string("op_10498_cast_fp16")]; - tensor var_10502 = const()[name = string("op_10502"), val = tensor([0, 2, 1])]; - int32 var_10513 = const()[name = string("op_10513"), val = int32(-1)]; - fp16 const_547_promoted_to_fp16 = const()[name = string("const_547_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_201_cast_fp16 = transpose(perm = var_10502, x = var_10498_cast_fp16)[name = string("transpose_119")]; - tensor var_10515_cast_fp16 = mul(x = hidden_states_201_cast_fp16, y = const_547_promoted_to_fp16)[name = string("op_10515_cast_fp16")]; - bool input_251_interleave_0 = const()[name = string("input_251_interleave_0"), val = bool(false)]; - tensor input_251_cast_fp16 = concat(axis = var_10513, interleave = input_251_interleave_0, values = (hidden_states_201_cast_fp16, var_10515_cast_fp16))[name = string("input_251_cast_fp16")]; - tensor normed_301_axes_0 = const()[name = string("normed_301_axes_0"), val = tensor([-1])]; - fp16 var_10510_to_fp16 = const()[name = string("op_10510_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_301_cast_fp16 = layer_norm(axes = normed_301_axes_0, epsilon = var_10510_to_fp16, x = input_251_cast_fp16)[name = string("normed_301_cast_fp16")]; - tensor normed_303_begin_0 = const()[name = string("normed_303_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_303_end_0 = const()[name = string("normed_303_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_303_end_mask_0 = const()[name = string("normed_303_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_303_cast_fp16 = slice_by_index(begin = normed_303_begin_0, end = normed_303_end_0, end_mask = normed_303_end_mask_0, x = normed_301_cast_fp16)[name = string("normed_303_cast_fp16")]; - tensor var_10529_to_fp16 = const()[name = string("op_10529_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281675904)))]; - tensor attn_output_129_cast_fp16 = mul(x = normed_303_cast_fp16, y = var_10529_to_fp16)[name = string("attn_output_129_cast_fp16")]; - tensor hidden_states_203_cast_fp16 = add(x = hidden_states_193_cast_fp16, y = attn_output_129_cast_fp16)[name = string("hidden_states_203_cast_fp16")]; - int32 var_10542 = const()[name = string("op_10542"), val = int32(-1)]; - fp16 const_551_promoted_to_fp16 = const()[name = string("const_551_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10544_cast_fp16 = mul(x = hidden_states_203_cast_fp16, y = const_551_promoted_to_fp16)[name = string("op_10544_cast_fp16")]; - bool input_253_interleave_0 = const()[name = string("input_253_interleave_0"), val = bool(false)]; - tensor input_253_cast_fp16 = concat(axis = var_10542, interleave = input_253_interleave_0, values = (hidden_states_203_cast_fp16, var_10544_cast_fp16))[name = string("input_253_cast_fp16")]; - tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; - fp16 var_10539_to_fp16 = const()[name = string("op_10539_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_10539_to_fp16, x = input_253_cast_fp16)[name = string("normed_305_cast_fp16")]; - tensor normed_307_begin_0 = const()[name = string("normed_307_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_307_end_0 = const()[name = string("normed_307_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_307_end_mask_0 = const()[name = string("normed_307_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_307_cast_fp16 = slice_by_index(begin = normed_307_begin_0, end = normed_307_end_0, end_mask = normed_307_end_mask_0, x = normed_305_cast_fp16)[name = string("normed_307_cast_fp16")]; - tensor var_10558_to_fp16 = const()[name = string("op_10558_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281678272)))]; - tensor x_205_cast_fp16 = mul(x = normed_307_cast_fp16, y = var_10558_to_fp16)[name = string("x_205_cast_fp16")]; - tensor var_10570 = const()[name = string("op_10570"), val = tensor([0, 2, 1])]; - tensor input_255_axes_0 = const()[name = string("input_255_axes_0"), val = tensor([2])]; - tensor var_10571_cast_fp16 = transpose(perm = var_10570, x = x_205_cast_fp16)[name = string("transpose_118")]; - tensor input_255_cast_fp16 = expand_dims(axes = input_255_axes_0, x = var_10571_cast_fp16)[name = string("input_255_cast_fp16")]; - string x_207_pad_type_0 = const()[name = string("x_207_pad_type_0"), val = string("valid")]; - tensor x_207_strides_0 = const()[name = string("x_207_strides_0"), val = tensor([1, 1])]; - tensor x_207_pad_0 = const()[name = string("x_207_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_207_dilations_0 = const()[name = string("x_207_dilations_0"), val = tensor([1, 1])]; - int32 x_207_groups_0 = const()[name = string("x_207_groups_0"), val = int32(1)]; - tensor model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1024744448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030716480))))[name = string("model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_207_cast_fp16 = conv(dilations = x_207_dilations_0, groups = x_207_groups_0, pad = x_207_pad_0, pad_type = x_207_pad_type_0, strides = x_207_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_255_cast_fp16)[name = string("x_207_cast_fp16")]; - string b_25_pad_type_0 = const()[name = string("b_25_pad_type_0"), val = string("valid")]; - tensor b_25_strides_0 = const()[name = string("b_25_strides_0"), val = tensor([1, 1])]; - tensor b_25_pad_0 = const()[name = string("b_25_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_25_dilations_0 = const()[name = string("b_25_dilations_0"), val = tensor([1, 1])]; - int32 b_25_groups_0 = const()[name = string("b_25_groups_0"), val = int32(1)]; - tensor model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030827136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1036799168))))[name = string("model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_25_cast_fp16 = conv(dilations = b_25_dilations_0, groups = b_25_groups_0, pad = b_25_pad_0, pad_type = b_25_pad_type_0, strides = b_25_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_255_cast_fp16)[name = string("b_25_cast_fp16")]; - string var_10596_mode_0 = const()[name = string("op_10596_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_10596_cast_fp16 = gelu(mode = var_10596_mode_0, x = x_207_cast_fp16)[name = string("op_10596_cast_fp16")]; - tensor input_257_cast_fp16 = mul(x = var_10596_cast_fp16, y = b_25_cast_fp16)[name = string("input_257_cast_fp16")]; - string e_25_pad_type_0 = const()[name = string("e_25_pad_type_0"), val = string("valid")]; - tensor e_25_strides_0 = const()[name = string("e_25_strides_0"), val = tensor([1, 1])]; - tensor e_25_pad_0 = const()[name = string("e_25_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_25_dilations_0 = const()[name = string("e_25_dilations_0"), val = tensor([1, 1])]; - int32 e_25_groups_0 = const()[name = string("e_25_groups_0"), val = int32(1)]; - tensor model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293846016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299818048))))[name = string("model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_25_cast_fp16 = conv(dilations = e_25_dilations_0, groups = e_25_groups_0, pad = e_25_pad_0, pad_type = e_25_pad_type_0, strides = e_25_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_257_cast_fp16)[name = string("e_25_cast_fp16")]; - tensor var_10604_axes_0 = const()[name = string("op_10604_axes_0"), val = tensor([2])]; - tensor var_10604_cast_fp16 = squeeze(axes = var_10604_axes_0, x = e_25_cast_fp16)[name = string("op_10604_cast_fp16")]; - tensor var_10605 = const()[name = string("op_10605"), val = tensor([0, 2, 1])]; - int32 var_10616 = const()[name = string("op_10616"), val = int32(-1)]; - fp16 const_555_promoted_to_fp16 = const()[name = string("const_555_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_205_cast_fp16 = transpose(perm = var_10605, x = var_10604_cast_fp16)[name = string("transpose_117")]; - tensor var_10618_cast_fp16 = mul(x = hidden_states_205_cast_fp16, y = const_555_promoted_to_fp16)[name = string("op_10618_cast_fp16")]; - bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)]; - tensor input_259_cast_fp16 = concat(axis = var_10616, interleave = input_259_interleave_0, values = (hidden_states_205_cast_fp16, var_10618_cast_fp16))[name = string("input_259_cast_fp16")]; - tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; - fp16 var_10613_to_fp16 = const()[name = string("op_10613_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_10613_to_fp16, x = input_259_cast_fp16)[name = string("normed_309_cast_fp16")]; - tensor normed_311_begin_0 = const()[name = string("normed_311_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_311_end_0 = const()[name = string("normed_311_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_311_end_mask_0 = const()[name = string("normed_311_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_311_cast_fp16 = slice_by_index(begin = normed_311_begin_0, end = normed_311_end_0, end_mask = normed_311_end_mask_0, x = normed_309_cast_fp16)[name = string("normed_311_cast_fp16")]; - tensor var_10632_to_fp16 = const()[name = string("op_10632_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299836544)))]; - tensor hidden_states_207_cast_fp16 = mul(x = normed_311_cast_fp16, y = var_10632_to_fp16)[name = string("hidden_states_207_cast_fp16")]; - tensor hidden_states_209_cast_fp16 = add(x = hidden_states_203_cast_fp16, y = hidden_states_207_cast_fp16)[name = string("hidden_states_209_cast_fp16")]; - int32 var_10686 = const()[name = string("op_10686"), val = int32(-1)]; - fp16 const_560_promoted_to_fp16 = const()[name = string("const_560_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10688_cast_fp16 = mul(x = hidden_states_209_cast_fp16, y = const_560_promoted_to_fp16)[name = string("op_10688_cast_fp16")]; - bool input_261_interleave_0 = const()[name = string("input_261_interleave_0"), val = bool(false)]; - tensor input_261_cast_fp16 = concat(axis = var_10686, interleave = input_261_interleave_0, values = (hidden_states_209_cast_fp16, var_10688_cast_fp16))[name = string("input_261_cast_fp16")]; - tensor normed_313_axes_0 = const()[name = string("normed_313_axes_0"), val = tensor([-1])]; - fp16 var_10683_to_fp16 = const()[name = string("op_10683_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_313_cast_fp16 = layer_norm(axes = normed_313_axes_0, epsilon = var_10683_to_fp16, x = input_261_cast_fp16)[name = string("normed_313_cast_fp16")]; - tensor normed_315_begin_0 = const()[name = string("normed_315_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_315_end_0 = const()[name = string("normed_315_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_315_end_mask_0 = const()[name = string("normed_315_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_315_cast_fp16 = slice_by_index(begin = normed_315_begin_0, end = normed_315_end_0, end_mask = normed_315_end_mask_0, x = normed_313_cast_fp16)[name = string("normed_315_cast_fp16")]; - tensor var_10702_to_fp16 = const()[name = string("op_10702_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299838912)))]; - tensor hidden_states_211_cast_fp16 = mul(x = normed_315_cast_fp16, y = var_10702_to_fp16)[name = string("hidden_states_211_cast_fp16")]; - tensor var_10713 = const()[name = string("op_10713"), val = tensor([0, 2, 1])]; - tensor var_10716_axes_0 = const()[name = string("op_10716_axes_0"), val = tensor([2])]; - tensor var_10714_cast_fp16 = transpose(perm = var_10713, x = hidden_states_211_cast_fp16)[name = string("transpose_116")]; - tensor var_10716_cast_fp16 = expand_dims(axes = var_10716_axes_0, x = var_10714_cast_fp16)[name = string("op_10716_cast_fp16")]; - string query_states_105_pad_type_0 = const()[name = string("query_states_105_pad_type_0"), val = string("valid")]; - tensor query_states_105_strides_0 = const()[name = string("query_states_105_strides_0"), val = tensor([1, 1])]; - tensor query_states_105_pad_0 = const()[name = string("query_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_105_dilations_0 = const()[name = string("query_states_105_dilations_0"), val = tensor([1, 1])]; - int32 query_states_105_groups_0 = const()[name = string("query_states_105_groups_0"), val = int32(1)]; - tensor query_states_105 = conv(dilations = query_states_105_dilations_0, groups = query_states_105_groups_0, pad = query_states_105_pad_0, pad_type = query_states_105_pad_type_0, strides = query_states_105_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_10716_cast_fp16)[name = string("query_states_105")]; - string key_states_131_pad_type_0 = const()[name = string("key_states_131_pad_type_0"), val = string("valid")]; - tensor key_states_131_strides_0 = const()[name = string("key_states_131_strides_0"), val = tensor([1, 1])]; - tensor key_states_131_pad_0 = const()[name = string("key_states_131_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_131_dilations_0 = const()[name = string("key_states_131_dilations_0"), val = tensor([1, 1])]; - int32 key_states_131_groups_0 = const()[name = string("key_states_131_groups_0"), val = int32(1)]; - tensor key_states_131 = conv(dilations = key_states_131_dilations_0, groups = key_states_131_groups_0, pad = key_states_131_pad_0, pad_type = key_states_131_pad_type_0, strides = key_states_131_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_10716_cast_fp16)[name = string("key_states_131")]; - string value_states_105_pad_type_0 = const()[name = string("value_states_105_pad_type_0"), val = string("valid")]; - tensor value_states_105_strides_0 = const()[name = string("value_states_105_strides_0"), val = tensor([1, 1])]; - tensor value_states_105_pad_0 = const()[name = string("value_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_105_dilations_0 = const()[name = string("value_states_105_dilations_0"), val = tensor([1, 1])]; - int32 value_states_105_groups_0 = const()[name = string("value_states_105_groups_0"), val = int32(1)]; - tensor value_states_105 = conv(dilations = value_states_105_dilations_0, groups = value_states_105_groups_0, pad = value_states_105_pad_0, pad_type = value_states_105_pad_type_0, strides = value_states_105_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_10716_cast_fp16)[name = string("value_states_105")]; - tensor var_10758 = const()[name = string("op_10758"), val = tensor([1, 4, 256, 64])]; - tensor var_10759 = reshape(shape = var_10758, x = query_states_105)[name = string("op_10759")]; - tensor var_10764 = const()[name = string("op_10764"), val = tensor([0, 1, 3, 2])]; - tensor var_10769 = const()[name = string("op_10769"), val = tensor([1, 1, 256, 64])]; - tensor var_10770 = reshape(shape = var_10769, x = key_states_131)[name = string("op_10770")]; - tensor var_10775 = const()[name = string("op_10775"), val = tensor([0, 1, 3, 2])]; - tensor var_10780 = const()[name = string("op_10780"), val = tensor([1, 1, 256, 64])]; - tensor var_10781 = reshape(shape = var_10780, x = value_states_105)[name = string("op_10781")]; - tensor var_10786 = const()[name = string("op_10786"), val = tensor([0, 1, 3, 2])]; - int32 var_10797 = const()[name = string("op_10797"), val = int32(-1)]; - fp16 const_565_promoted = const()[name = string("const_565_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_213 = transpose(perm = var_10764, x = var_10759)[name = string("transpose_115")]; - tensor var_10799 = mul(x = hidden_states_213, y = const_565_promoted)[name = string("op_10799")]; - bool input_265_interleave_0 = const()[name = string("input_265_interleave_0"), val = bool(false)]; - tensor input_265 = concat(axis = var_10797, interleave = input_265_interleave_0, values = (hidden_states_213, var_10799))[name = string("input_265")]; - tensor normed_317_axes_0 = const()[name = string("normed_317_axes_0"), val = tensor([-1])]; - fp16 var_10794_to_fp16 = const()[name = string("op_10794_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_317_cast_fp16 = layer_norm(axes = normed_317_axes_0, epsilon = var_10794_to_fp16, x = input_265)[name = string("normed_317_cast_fp16")]; - tensor normed_319_begin_0 = const()[name = string("normed_319_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_319_end_0 = const()[name = string("normed_319_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_319_end_mask_0 = const()[name = string("normed_319_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_319 = slice_by_index(begin = normed_319_begin_0, end = normed_319_end_0, end_mask = normed_319_end_mask_0, x = normed_317_cast_fp16)[name = string("normed_319")]; - tensor var_10813_to_fp16 = const()[name = string("op_10813_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299841280)))]; - tensor q_27_cast_fp16 = mul(x = normed_319, y = var_10813_to_fp16)[name = string("q_27_cast_fp16")]; - int32 var_10824 = const()[name = string("op_10824"), val = int32(-1)]; - fp16 const_569_promoted = const()[name = string("const_569_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_215 = transpose(perm = var_10775, x = var_10770)[name = string("transpose_114")]; - tensor var_10826 = mul(x = hidden_states_215, y = const_569_promoted)[name = string("op_10826")]; - bool input_267_interleave_0 = const()[name = string("input_267_interleave_0"), val = bool(false)]; - tensor input_267 = concat(axis = var_10824, interleave = input_267_interleave_0, values = (hidden_states_215, var_10826))[name = string("input_267")]; - tensor normed_321_axes_0 = const()[name = string("normed_321_axes_0"), val = tensor([-1])]; - fp16 var_10821_to_fp16 = const()[name = string("op_10821_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_321_cast_fp16 = layer_norm(axes = normed_321_axes_0, epsilon = var_10821_to_fp16, x = input_267)[name = string("normed_321_cast_fp16")]; - tensor normed_323_begin_0 = const()[name = string("normed_323_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_323_end_0 = const()[name = string("normed_323_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_323_end_mask_0 = const()[name = string("normed_323_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_323 = slice_by_index(begin = normed_323_begin_0, end = normed_323_end_0, end_mask = normed_323_end_mask_0, x = normed_321_cast_fp16)[name = string("normed_323")]; - tensor var_10840_to_fp16 = const()[name = string("op_10840_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299841856)))]; - tensor k_27_cast_fp16 = mul(x = normed_323, y = var_10840_to_fp16)[name = string("k_27_cast_fp16")]; - tensor var_10854_cast_fp16 = mul(x = q_27_cast_fp16, y = cos_5)[name = string("op_10854_cast_fp16")]; - tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27_cast_fp16)[name = string("x1_53_cast_fp16")]; - tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27_cast_fp16)[name = string("x2_53_cast_fp16")]; - fp16 const_575_promoted_to_fp16 = const()[name = string("const_575_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10875_cast_fp16 = mul(x = x2_53_cast_fp16, y = const_575_promoted_to_fp16)[name = string("op_10875_cast_fp16")]; - int32 var_10877 = const()[name = string("op_10877"), val = int32(-1)]; - bool var_10878_interleave_0 = const()[name = string("op_10878_interleave_0"), val = bool(false)]; - tensor var_10878_cast_fp16 = concat(axis = var_10877, interleave = var_10878_interleave_0, values = (var_10875_cast_fp16, x1_53_cast_fp16))[name = string("op_10878_cast_fp16")]; - tensor var_10879_cast_fp16 = mul(x = var_10878_cast_fp16, y = sin_5)[name = string("op_10879_cast_fp16")]; - tensor query_states_107_cast_fp16 = add(x = var_10854_cast_fp16, y = var_10879_cast_fp16)[name = string("query_states_107_cast_fp16")]; - tensor var_10882_cast_fp16 = mul(x = k_27_cast_fp16, y = cos_5)[name = string("op_10882_cast_fp16")]; - tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27_cast_fp16)[name = string("x1_55_cast_fp16")]; - tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27_cast_fp16)[name = string("x2_55_cast_fp16")]; - fp16 const_578_promoted_to_fp16 = const()[name = string("const_578_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10903_cast_fp16 = mul(x = x2_55_cast_fp16, y = const_578_promoted_to_fp16)[name = string("op_10903_cast_fp16")]; - int32 var_10905 = const()[name = string("op_10905"), val = int32(-1)]; - bool var_10906_interleave_0 = const()[name = string("op_10906_interleave_0"), val = bool(false)]; - tensor var_10906_cast_fp16 = concat(axis = var_10905, interleave = var_10906_interleave_0, values = (var_10903_cast_fp16, x1_55_cast_fp16))[name = string("op_10906_cast_fp16")]; - tensor var_10907_cast_fp16 = mul(x = var_10906_cast_fp16, y = sin_5)[name = string("op_10907_cast_fp16")]; - tensor key_states_133_cast_fp16 = add(x = var_10882_cast_fp16, y = var_10907_cast_fp16)[name = string("key_states_133_cast_fp16")]; - tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([11])]; - tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; - tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; - tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([12])]; - int32 concat_236_axis_0 = const()[name = string("concat_236_axis_0"), val = int32(0)]; - bool concat_236_interleave_0 = const()[name = string("concat_236_interleave_0"), val = bool(false)]; - tensor concat_236 = concat(axis = concat_236_axis_0, interleave = concat_236_interleave_0, values = (expand_dims_156, expand_dims_157, current_pos, expand_dims_159))[name = string("concat_236")]; - tensor concat_237_values1_0 = const()[name = string("concat_237_values1_0"), val = tensor([0])]; - tensor concat_237_values3_0 = const()[name = string("concat_237_values3_0"), val = tensor([0])]; - int32 concat_237_axis_0 = const()[name = string("concat_237_axis_0"), val = int32(0)]; - bool concat_237_interleave_0 = const()[name = string("concat_237_interleave_0"), val = bool(false)]; - tensor concat_237 = concat(axis = concat_237_axis_0, interleave = concat_237_interleave_0, values = (expand_dims_160, concat_237_values1_0, end_pos_1, concat_237_values3_0))[name = string("concat_237")]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_236, begin_mask = model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0, end = concat_237, end_mask = model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_23_stride_0, update = key_states_133_cast_fp16, x = coreml_update_state_77)[name = string("model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_130_write_state")]; - tensor coreml_update_state_78 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_130")]; - tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([33])]; - tensor expand_dims_163 = const()[name = string("expand_dims_163"), val = tensor([0])]; - tensor expand_dims_165 = const()[name = string("expand_dims_165"), val = tensor([0])]; - tensor expand_dims_166 = const()[name = string("expand_dims_166"), val = tensor([34])]; - int32 concat_240_axis_0 = const()[name = string("concat_240_axis_0"), val = int32(0)]; - bool concat_240_interleave_0 = const()[name = string("concat_240_interleave_0"), val = bool(false)]; - tensor concat_240 = concat(axis = concat_240_axis_0, interleave = concat_240_interleave_0, values = (expand_dims_162, expand_dims_163, current_pos, expand_dims_165))[name = string("concat_240")]; - tensor concat_241_values1_0 = const()[name = string("concat_241_values1_0"), val = tensor([0])]; - tensor concat_241_values3_0 = const()[name = string("concat_241_values3_0"), val = tensor([0])]; - int32 concat_241_axis_0 = const()[name = string("concat_241_axis_0"), val = int32(0)]; - bool concat_241_interleave_0 = const()[name = string("concat_241_interleave_0"), val = bool(false)]; - tensor concat_241 = concat(axis = concat_241_axis_0, interleave = concat_241_interleave_0, values = (expand_dims_166, concat_241_values1_0, end_pos_1, concat_241_values3_0))[name = string("concat_241")]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_107 = transpose(perm = var_10786, x = var_10781)[name = string("transpose_113")]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_240, begin_mask = model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0, end = concat_241, end_mask = model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_24_stride_0, update = value_states_107, x = coreml_update_state_78)[name = string("model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_131_write_state")]; - tensor coreml_update_state_79 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_131")]; - tensor var_11006_begin_0 = const()[name = string("op_11006_begin_0"), val = tensor([11, 0, 0, 0])]; - tensor var_11006_end_0 = const()[name = string("op_11006_end_0"), val = tensor([12, 1, 512, 256])]; - tensor var_11006_end_mask_0 = const()[name = string("op_11006_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_11006_cast_fp16 = slice_by_index(begin = var_11006_begin_0, end = var_11006_end_0, end_mask = var_11006_end_mask_0, x = coreml_update_state_79)[name = string("op_11006_cast_fp16")]; - tensor var_11013_begin_0 = const()[name = string("op_11013_begin_0"), val = tensor([33, 0, 0, 0])]; - tensor var_11013_end_0 = const()[name = string("op_11013_end_0"), val = tensor([34, 1, 512, 256])]; - tensor var_11013_end_mask_0 = const()[name = string("op_11013_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_11013_cast_fp16 = slice_by_index(begin = var_11013_begin_0, end = var_11013_end_0, end_mask = var_11013_end_mask_0, x = coreml_update_state_79)[name = string("op_11013_cast_fp16")]; - tensor var_11052 = const()[name = string("op_11052"), val = tensor([1, 4, 1, 1])]; - tensor x_213_cast_fp16 = tile(reps = var_11052, x = var_11006_cast_fp16)[name = string("x_213_cast_fp16")]; - tensor var_11072 = const()[name = string("op_11072"), val = tensor([1, 4, 1, 1])]; - tensor x_219_cast_fp16 = tile(reps = var_11072, x = var_11013_cast_fp16)[name = string("x_219_cast_fp16")]; - bool var_11099_transpose_x_0 = const()[name = string("op_11099_transpose_x_0"), val = bool(false)]; - bool var_11099_transpose_y_0 = const()[name = string("op_11099_transpose_y_0"), val = bool(true)]; - tensor var_11099 = matmul(transpose_x = var_11099_transpose_x_0, transpose_y = var_11099_transpose_y_0, x = query_states_107_cast_fp16, y = x_213_cast_fp16)[name = string("op_11099")]; - fp16 var_11100_to_fp16 = const()[name = string("op_11100_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_53_cast_fp16 = mul(x = var_11099, y = var_11100_to_fp16)[name = string("attn_weights_53_cast_fp16")]; - tensor attn_weights_55_cast_fp16 = add(x = attn_weights_53_cast_fp16, y = mask_slice_1)[name = string("attn_weights_55_cast_fp16")]; - int32 var_11135 = const()[name = string("op_11135"), val = int32(-1)]; - tensor var_11137_cast_fp16 = softmax(axis = var_11135, x = attn_weights_55_cast_fp16)[name = string("op_11137_cast_fp16")]; - tensor concat_246 = const()[name = string("concat_246"), val = tensor([4, 64, 512])]; - tensor reshape_39_cast_fp16 = reshape(shape = concat_246, x = var_11137_cast_fp16)[name = string("reshape_39_cast_fp16")]; - tensor concat_247 = const()[name = string("concat_247"), val = tensor([4, 512, 256])]; - tensor reshape_40_cast_fp16 = reshape(shape = concat_247, x = x_219_cast_fp16)[name = string("reshape_40_cast_fp16")]; - bool matmul_13_transpose_x_0 = const()[name = string("matmul_13_transpose_x_0"), val = bool(false)]; - bool matmul_13_transpose_y_0 = const()[name = string("matmul_13_transpose_y_0"), val = bool(false)]; - tensor matmul_13_cast_fp16 = matmul(transpose_x = matmul_13_transpose_x_0, transpose_y = matmul_13_transpose_y_0, x = reshape_39_cast_fp16, y = reshape_40_cast_fp16)[name = string("matmul_13_cast_fp16")]; - tensor concat_251 = const()[name = string("concat_251"), val = tensor([1, 4, 64, 256])]; - tensor reshape_41_cast_fp16 = reshape(shape = concat_251, x = matmul_13_cast_fp16)[name = string("reshape_41_cast_fp16")]; - tensor var_11149_perm_0 = const()[name = string("op_11149_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_11168 = const()[name = string("op_11168"), val = tensor([1, 64, 1024])]; - tensor var_11149_cast_fp16 = transpose(perm = var_11149_perm_0, x = reshape_41_cast_fp16)[name = string("transpose_112")]; - tensor attn_output_135_cast_fp16 = reshape(shape = var_11168, x = var_11149_cast_fp16)[name = string("attn_output_135_cast_fp16")]; - tensor var_11173 = const()[name = string("op_11173"), val = tensor([0, 2, 1])]; - string var_11189_pad_type_0 = const()[name = string("op_11189_pad_type_0"), val = string("valid")]; - int32 var_11189_groups_0 = const()[name = string("op_11189_groups_0"), val = int32(1)]; - tensor var_11189_strides_0 = const()[name = string("op_11189_strides_0"), val = tensor([1])]; - tensor var_11189_pad_0 = const()[name = string("op_11189_pad_0"), val = tensor([0, 0])]; - tensor var_11189_dilations_0 = const()[name = string("op_11189_dilations_0"), val = tensor([1])]; - tensor squeeze_13_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299842432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300727232))))[name = string("squeeze_13_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_11174_cast_fp16 = transpose(perm = var_11173, x = attn_output_135_cast_fp16)[name = string("transpose_111")]; - tensor var_11189_cast_fp16 = conv(dilations = var_11189_dilations_0, groups = var_11189_groups_0, pad = var_11189_pad_0, pad_type = var_11189_pad_type_0, strides = var_11189_strides_0, weight = squeeze_13_cast_fp16_to_fp32_to_fp16_palettized, x = var_11174_cast_fp16)[name = string("op_11189_cast_fp16")]; - tensor var_11193 = const()[name = string("op_11193"), val = tensor([0, 2, 1])]; - int32 var_11204 = const()[name = string("op_11204"), val = int32(-1)]; - fp16 const_590_promoted_to_fp16 = const()[name = string("const_590_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_217_cast_fp16 = transpose(perm = var_11193, x = var_11189_cast_fp16)[name = string("transpose_110")]; - tensor var_11206_cast_fp16 = mul(x = hidden_states_217_cast_fp16, y = const_590_promoted_to_fp16)[name = string("op_11206_cast_fp16")]; - bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; - tensor input_271_cast_fp16 = concat(axis = var_11204, interleave = input_271_interleave_0, values = (hidden_states_217_cast_fp16, var_11206_cast_fp16))[name = string("input_271_cast_fp16")]; - tensor normed_325_axes_0 = const()[name = string("normed_325_axes_0"), val = tensor([-1])]; - fp16 var_11201_to_fp16 = const()[name = string("op_11201_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_325_cast_fp16 = layer_norm(axes = normed_325_axes_0, epsilon = var_11201_to_fp16, x = input_271_cast_fp16)[name = string("normed_325_cast_fp16")]; - tensor normed_327_begin_0 = const()[name = string("normed_327_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_327_end_0 = const()[name = string("normed_327_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_327_end_mask_0 = const()[name = string("normed_327_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_327_cast_fp16 = slice_by_index(begin = normed_327_begin_0, end = normed_327_end_0, end_mask = normed_327_end_mask_0, x = normed_325_cast_fp16)[name = string("normed_327_cast_fp16")]; - tensor var_11220_to_fp16 = const()[name = string("op_11220_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300745728)))]; - tensor attn_output_139_cast_fp16 = mul(x = normed_327_cast_fp16, y = var_11220_to_fp16)[name = string("attn_output_139_cast_fp16")]; - tensor hidden_states_219_cast_fp16 = add(x = hidden_states_209_cast_fp16, y = attn_output_139_cast_fp16)[name = string("hidden_states_219_cast_fp16")]; - int32 var_11233 = const()[name = string("op_11233"), val = int32(-1)]; - fp16 const_594_promoted_to_fp16 = const()[name = string("const_594_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11235_cast_fp16 = mul(x = hidden_states_219_cast_fp16, y = const_594_promoted_to_fp16)[name = string("op_11235_cast_fp16")]; - bool input_273_interleave_0 = const()[name = string("input_273_interleave_0"), val = bool(false)]; - tensor input_273_cast_fp16 = concat(axis = var_11233, interleave = input_273_interleave_0, values = (hidden_states_219_cast_fp16, var_11235_cast_fp16))[name = string("input_273_cast_fp16")]; - tensor normed_329_axes_0 = const()[name = string("normed_329_axes_0"), val = tensor([-1])]; - fp16 var_11230_to_fp16 = const()[name = string("op_11230_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_329_cast_fp16 = layer_norm(axes = normed_329_axes_0, epsilon = var_11230_to_fp16, x = input_273_cast_fp16)[name = string("normed_329_cast_fp16")]; - tensor normed_331_begin_0 = const()[name = string("normed_331_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_331_end_0 = const()[name = string("normed_331_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_331_end_mask_0 = const()[name = string("normed_331_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_331_cast_fp16 = slice_by_index(begin = normed_331_begin_0, end = normed_331_end_0, end_mask = normed_331_end_mask_0, x = normed_329_cast_fp16)[name = string("normed_331_cast_fp16")]; - tensor var_11249_to_fp16 = const()[name = string("op_11249_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300748096)))]; - tensor x_221_cast_fp16 = mul(x = normed_331_cast_fp16, y = var_11249_to_fp16)[name = string("x_221_cast_fp16")]; - tensor var_11261 = const()[name = string("op_11261"), val = tensor([0, 2, 1])]; - tensor input_275_axes_0 = const()[name = string("input_275_axes_0"), val = tensor([2])]; - tensor var_11262_cast_fp16 = transpose(perm = var_11261, x = x_221_cast_fp16)[name = string("transpose_109")]; - tensor input_275_cast_fp16 = expand_dims(axes = input_275_axes_0, x = var_11262_cast_fp16)[name = string("input_275_cast_fp16")]; - string x_223_pad_type_0 = const()[name = string("x_223_pad_type_0"), val = string("valid")]; - tensor x_223_strides_0 = const()[name = string("x_223_strides_0"), val = tensor([1, 1])]; - tensor x_223_pad_0 = const()[name = string("x_223_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_223_dilations_0 = const()[name = string("x_223_dilations_0"), val = tensor([1, 1])]; - int32 x_223_groups_0 = const()[name = string("x_223_groups_0"), val = int32(1)]; - tensor model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1036909824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1042881856))))[name = string("model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_223_cast_fp16 = conv(dilations = x_223_dilations_0, groups = x_223_groups_0, pad = x_223_pad_0, pad_type = x_223_pad_type_0, strides = x_223_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_275_cast_fp16)[name = string("x_223_cast_fp16")]; - string b_27_pad_type_0 = const()[name = string("b_27_pad_type_0"), val = string("valid")]; - tensor b_27_strides_0 = const()[name = string("b_27_strides_0"), val = tensor([1, 1])]; - tensor b_27_pad_0 = const()[name = string("b_27_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_27_dilations_0 = const()[name = string("b_27_dilations_0"), val = tensor([1, 1])]; - int32 b_27_groups_0 = const()[name = string("b_27_groups_0"), val = int32(1)]; - tensor model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1042992512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1048964544))))[name = string("model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_27_cast_fp16 = conv(dilations = b_27_dilations_0, groups = b_27_groups_0, pad = b_27_pad_0, pad_type = b_27_pad_type_0, strides = b_27_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_275_cast_fp16)[name = string("b_27_cast_fp16")]; - string var_11287_mode_0 = const()[name = string("op_11287_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_11287_cast_fp16 = gelu(mode = var_11287_mode_0, x = x_223_cast_fp16)[name = string("op_11287_cast_fp16")]; - tensor input_277_cast_fp16 = mul(x = var_11287_cast_fp16, y = b_27_cast_fp16)[name = string("input_277_cast_fp16")]; - string e_27_pad_type_0 = const()[name = string("e_27_pad_type_0"), val = string("valid")]; - tensor e_27_strides_0 = const()[name = string("e_27_strides_0"), val = tensor([1, 1])]; - tensor e_27_pad_0 = const()[name = string("e_27_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_27_dilations_0 = const()[name = string("e_27_dilations_0"), val = tensor([1, 1])]; - int32 e_27_groups_0 = const()[name = string("e_27_groups_0"), val = int32(1)]; - tensor model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312915840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318887872))))[name = string("model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_27_cast_fp16 = conv(dilations = e_27_dilations_0, groups = e_27_groups_0, pad = e_27_pad_0, pad_type = e_27_pad_type_0, strides = e_27_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_277_cast_fp16)[name = string("e_27_cast_fp16")]; - tensor var_11295_axes_0 = const()[name = string("op_11295_axes_0"), val = tensor([2])]; - tensor var_11295_cast_fp16 = squeeze(axes = var_11295_axes_0, x = e_27_cast_fp16)[name = string("op_11295_cast_fp16")]; - tensor var_11296 = const()[name = string("op_11296"), val = tensor([0, 2, 1])]; - int32 var_11307 = const()[name = string("op_11307"), val = int32(-1)]; - fp16 const_598_promoted_to_fp16 = const()[name = string("const_598_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_221_cast_fp16 = transpose(perm = var_11296, x = var_11295_cast_fp16)[name = string("transpose_108")]; - tensor var_11309_cast_fp16 = mul(x = hidden_states_221_cast_fp16, y = const_598_promoted_to_fp16)[name = string("op_11309_cast_fp16")]; - bool input_279_interleave_0 = const()[name = string("input_279_interleave_0"), val = bool(false)]; - tensor input_279_cast_fp16 = concat(axis = var_11307, interleave = input_279_interleave_0, values = (hidden_states_221_cast_fp16, var_11309_cast_fp16))[name = string("input_279_cast_fp16")]; - tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; - fp16 var_11304_to_fp16 = const()[name = string("op_11304_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_11304_to_fp16, x = input_279_cast_fp16)[name = string("normed_333_cast_fp16")]; - tensor normed_335_begin_0 = const()[name = string("normed_335_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_335_end_0 = const()[name = string("normed_335_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_335_end_mask_0 = const()[name = string("normed_335_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_335_cast_fp16 = slice_by_index(begin = normed_335_begin_0, end = normed_335_end_0, end_mask = normed_335_end_mask_0, x = normed_333_cast_fp16)[name = string("normed_335_cast_fp16")]; - tensor var_11323_to_fp16 = const()[name = string("op_11323_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318906368)))]; - tensor hidden_states_223_cast_fp16 = mul(x = normed_335_cast_fp16, y = var_11323_to_fp16)[name = string("hidden_states_223_cast_fp16")]; - tensor hidden_states_225_cast_fp16 = add(x = hidden_states_219_cast_fp16, y = hidden_states_223_cast_fp16)[name = string("hidden_states_225_cast_fp16")]; - int32 var_11377 = const()[name = string("op_11377"), val = int32(-1)]; - fp16 const_603_promoted_to_fp16 = const()[name = string("const_603_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11379_cast_fp16 = mul(x = hidden_states_225_cast_fp16, y = const_603_promoted_to_fp16)[name = string("op_11379_cast_fp16")]; - bool input_281_interleave_0 = const()[name = string("input_281_interleave_0"), val = bool(false)]; - tensor input_281_cast_fp16 = concat(axis = var_11377, interleave = input_281_interleave_0, values = (hidden_states_225_cast_fp16, var_11379_cast_fp16))[name = string("input_281_cast_fp16")]; - tensor normed_337_axes_0 = const()[name = string("normed_337_axes_0"), val = tensor([-1])]; - fp16 var_11374_to_fp16 = const()[name = string("op_11374_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_337_cast_fp16 = layer_norm(axes = normed_337_axes_0, epsilon = var_11374_to_fp16, x = input_281_cast_fp16)[name = string("normed_337_cast_fp16")]; - tensor normed_339_begin_0 = const()[name = string("normed_339_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_339_end_0 = const()[name = string("normed_339_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_339_end_mask_0 = const()[name = string("normed_339_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_339_cast_fp16 = slice_by_index(begin = normed_339_begin_0, end = normed_339_end_0, end_mask = normed_339_end_mask_0, x = normed_337_cast_fp16)[name = string("normed_339_cast_fp16")]; - tensor var_11393_to_fp16 = const()[name = string("op_11393_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318908736)))]; - tensor hidden_states_227_cast_fp16 = mul(x = normed_339_cast_fp16, y = var_11393_to_fp16)[name = string("hidden_states_227_cast_fp16")]; - tensor var_11404 = const()[name = string("op_11404"), val = tensor([0, 2, 1])]; - tensor var_11407_axes_0 = const()[name = string("op_11407_axes_0"), val = tensor([2])]; - tensor var_11405_cast_fp16 = transpose(perm = var_11404, x = hidden_states_227_cast_fp16)[name = string("transpose_107")]; - tensor var_11407_cast_fp16 = expand_dims(axes = var_11407_axes_0, x = var_11405_cast_fp16)[name = string("op_11407_cast_fp16")]; - string query_states_113_pad_type_0 = const()[name = string("query_states_113_pad_type_0"), val = string("valid")]; - tensor query_states_113_strides_0 = const()[name = string("query_states_113_strides_0"), val = tensor([1, 1])]; - tensor query_states_113_pad_0 = const()[name = string("query_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_113_dilations_0 = const()[name = string("query_states_113_dilations_0"), val = tensor([1, 1])]; - int32 query_states_113_groups_0 = const()[name = string("query_states_113_groups_0"), val = int32(1)]; - tensor query_states_113 = conv(dilations = query_states_113_dilations_0, groups = query_states_113_groups_0, pad = query_states_113_pad_0, pad_type = query_states_113_pad_type_0, strides = query_states_113_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_11407_cast_fp16)[name = string("query_states_113")]; - string key_states_141_pad_type_0 = const()[name = string("key_states_141_pad_type_0"), val = string("valid")]; - tensor key_states_141_strides_0 = const()[name = string("key_states_141_strides_0"), val = tensor([1, 1])]; - tensor key_states_141_pad_0 = const()[name = string("key_states_141_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_141_dilations_0 = const()[name = string("key_states_141_dilations_0"), val = tensor([1, 1])]; - int32 key_states_141_groups_0 = const()[name = string("key_states_141_groups_0"), val = int32(1)]; - tensor key_states_141 = conv(dilations = key_states_141_dilations_0, groups = key_states_141_groups_0, pad = key_states_141_pad_0, pad_type = key_states_141_pad_type_0, strides = key_states_141_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_11407_cast_fp16)[name = string("key_states_141")]; - string value_states_113_pad_type_0 = const()[name = string("value_states_113_pad_type_0"), val = string("valid")]; - tensor value_states_113_strides_0 = const()[name = string("value_states_113_strides_0"), val = tensor([1, 1])]; - tensor value_states_113_pad_0 = const()[name = string("value_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_113_dilations_0 = const()[name = string("value_states_113_dilations_0"), val = tensor([1, 1])]; - int32 value_states_113_groups_0 = const()[name = string("value_states_113_groups_0"), val = int32(1)]; - tensor value_states_113 = conv(dilations = value_states_113_dilations_0, groups = value_states_113_groups_0, pad = value_states_113_pad_0, pad_type = value_states_113_pad_type_0, strides = value_states_113_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_11407_cast_fp16)[name = string("value_states_113")]; - tensor var_11449 = const()[name = string("op_11449"), val = tensor([1, 4, 256, 64])]; - tensor var_11450 = reshape(shape = var_11449, x = query_states_113)[name = string("op_11450")]; - tensor var_11455 = const()[name = string("op_11455"), val = tensor([0, 1, 3, 2])]; - tensor var_11460 = const()[name = string("op_11460"), val = tensor([1, 1, 256, 64])]; - tensor var_11461 = reshape(shape = var_11460, x = key_states_141)[name = string("op_11461")]; - tensor var_11466 = const()[name = string("op_11466"), val = tensor([0, 1, 3, 2])]; - tensor var_11471 = const()[name = string("op_11471"), val = tensor([1, 1, 256, 64])]; - tensor var_11472 = reshape(shape = var_11471, x = value_states_113)[name = string("op_11472")]; - tensor var_11477 = const()[name = string("op_11477"), val = tensor([0, 1, 3, 2])]; - int32 var_11488 = const()[name = string("op_11488"), val = int32(-1)]; - fp16 const_608_promoted = const()[name = string("const_608_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_229 = transpose(perm = var_11455, x = var_11450)[name = string("transpose_106")]; - tensor var_11490 = mul(x = hidden_states_229, y = const_608_promoted)[name = string("op_11490")]; - bool input_285_interleave_0 = const()[name = string("input_285_interleave_0"), val = bool(false)]; - tensor input_285 = concat(axis = var_11488, interleave = input_285_interleave_0, values = (hidden_states_229, var_11490))[name = string("input_285")]; - tensor normed_341_axes_0 = const()[name = string("normed_341_axes_0"), val = tensor([-1])]; - fp16 var_11485_to_fp16 = const()[name = string("op_11485_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_341_cast_fp16 = layer_norm(axes = normed_341_axes_0, epsilon = var_11485_to_fp16, x = input_285)[name = string("normed_341_cast_fp16")]; - tensor normed_343_begin_0 = const()[name = string("normed_343_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_343_end_0 = const()[name = string("normed_343_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_343_end_mask_0 = const()[name = string("normed_343_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_343 = slice_by_index(begin = normed_343_begin_0, end = normed_343_end_0, end_mask = normed_343_end_mask_0, x = normed_341_cast_fp16)[name = string("normed_343")]; - tensor var_11504_to_fp16 = const()[name = string("op_11504_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318911104)))]; - tensor q_29_cast_fp16 = mul(x = normed_343, y = var_11504_to_fp16)[name = string("q_29_cast_fp16")]; - int32 var_11515 = const()[name = string("op_11515"), val = int32(-1)]; - fp16 const_612_promoted = const()[name = string("const_612_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_231 = transpose(perm = var_11466, x = var_11461)[name = string("transpose_105")]; - tensor var_11517 = mul(x = hidden_states_231, y = const_612_promoted)[name = string("op_11517")]; - bool input_287_interleave_0 = const()[name = string("input_287_interleave_0"), val = bool(false)]; - tensor input_287 = concat(axis = var_11515, interleave = input_287_interleave_0, values = (hidden_states_231, var_11517))[name = string("input_287")]; - tensor normed_345_axes_0 = const()[name = string("normed_345_axes_0"), val = tensor([-1])]; - fp16 var_11512_to_fp16 = const()[name = string("op_11512_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_345_cast_fp16 = layer_norm(axes = normed_345_axes_0, epsilon = var_11512_to_fp16, x = input_287)[name = string("normed_345_cast_fp16")]; - tensor normed_347_begin_0 = const()[name = string("normed_347_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_347_end_0 = const()[name = string("normed_347_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_347_end_mask_0 = const()[name = string("normed_347_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_347 = slice_by_index(begin = normed_347_begin_0, end = normed_347_end_0, end_mask = normed_347_end_mask_0, x = normed_345_cast_fp16)[name = string("normed_347")]; - tensor var_11531_to_fp16 = const()[name = string("op_11531_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318911680)))]; - tensor k_29_cast_fp16 = mul(x = normed_347, y = var_11531_to_fp16)[name = string("k_29_cast_fp16")]; - tensor var_11545_cast_fp16 = mul(x = q_29_cast_fp16, y = cos_5)[name = string("op_11545_cast_fp16")]; - tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29_cast_fp16)[name = string("x1_57_cast_fp16")]; - tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29_cast_fp16)[name = string("x2_57_cast_fp16")]; - fp16 const_618_promoted_to_fp16 = const()[name = string("const_618_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11566_cast_fp16 = mul(x = x2_57_cast_fp16, y = const_618_promoted_to_fp16)[name = string("op_11566_cast_fp16")]; - int32 var_11568 = const()[name = string("op_11568"), val = int32(-1)]; - bool var_11569_interleave_0 = const()[name = string("op_11569_interleave_0"), val = bool(false)]; - tensor var_11569_cast_fp16 = concat(axis = var_11568, interleave = var_11569_interleave_0, values = (var_11566_cast_fp16, x1_57_cast_fp16))[name = string("op_11569_cast_fp16")]; - tensor var_11570_cast_fp16 = mul(x = var_11569_cast_fp16, y = sin_5)[name = string("op_11570_cast_fp16")]; - tensor query_states_115_cast_fp16 = add(x = var_11545_cast_fp16, y = var_11570_cast_fp16)[name = string("query_states_115_cast_fp16")]; - tensor var_11573_cast_fp16 = mul(x = k_29_cast_fp16, y = cos_5)[name = string("op_11573_cast_fp16")]; - tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29_cast_fp16)[name = string("x1_59_cast_fp16")]; - tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29_cast_fp16)[name = string("x2_59_cast_fp16")]; - fp16 const_621_promoted_to_fp16 = const()[name = string("const_621_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11594_cast_fp16 = mul(x = x2_59_cast_fp16, y = const_621_promoted_to_fp16)[name = string("op_11594_cast_fp16")]; - int32 var_11596 = const()[name = string("op_11596"), val = int32(-1)]; - bool var_11597_interleave_0 = const()[name = string("op_11597_interleave_0"), val = bool(false)]; - tensor var_11597_cast_fp16 = concat(axis = var_11596, interleave = var_11597_interleave_0, values = (var_11594_cast_fp16, x1_59_cast_fp16))[name = string("op_11597_cast_fp16")]; - tensor var_11598_cast_fp16 = mul(x = var_11597_cast_fp16, y = sin_5)[name = string("op_11598_cast_fp16")]; - tensor key_states_143_cast_fp16 = add(x = var_11573_cast_fp16, y = var_11598_cast_fp16)[name = string("key_states_143_cast_fp16")]; - tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([12])]; - tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; - tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; - tensor expand_dims_172 = const()[name = string("expand_dims_172"), val = tensor([13])]; - int32 concat_254_axis_0 = const()[name = string("concat_254_axis_0"), val = int32(0)]; - bool concat_254_interleave_0 = const()[name = string("concat_254_interleave_0"), val = bool(false)]; - tensor concat_254 = concat(axis = concat_254_axis_0, interleave = concat_254_interleave_0, values = (expand_dims_168, expand_dims_169, current_pos, expand_dims_171))[name = string("concat_254")]; - tensor concat_255_values1_0 = const()[name = string("concat_255_values1_0"), val = tensor([0])]; - tensor concat_255_values3_0 = const()[name = string("concat_255_values3_0"), val = tensor([0])]; - int32 concat_255_axis_0 = const()[name = string("concat_255_axis_0"), val = int32(0)]; - bool concat_255_interleave_0 = const()[name = string("concat_255_interleave_0"), val = bool(false)]; - tensor concat_255 = concat(axis = concat_255_axis_0, interleave = concat_255_interleave_0, values = (expand_dims_172, concat_255_values1_0, end_pos_1, concat_255_values3_0))[name = string("concat_255")]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_254, begin_mask = model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0, end = concat_255, end_mask = model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_25_stride_0, update = key_states_143_cast_fp16, x = coreml_update_state_79)[name = string("model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_132_write_state")]; - tensor coreml_update_state_80 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_132")]; - tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([34])]; - tensor expand_dims_175 = const()[name = string("expand_dims_175"), val = tensor([0])]; - tensor expand_dims_177 = const()[name = string("expand_dims_177"), val = tensor([0])]; - tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([35])]; - int32 concat_258_axis_0 = const()[name = string("concat_258_axis_0"), val = int32(0)]; - bool concat_258_interleave_0 = const()[name = string("concat_258_interleave_0"), val = bool(false)]; - tensor concat_258 = concat(axis = concat_258_axis_0, interleave = concat_258_interleave_0, values = (expand_dims_174, expand_dims_175, current_pos, expand_dims_177))[name = string("concat_258")]; - tensor concat_259_values1_0 = const()[name = string("concat_259_values1_0"), val = tensor([0])]; - tensor concat_259_values3_0 = const()[name = string("concat_259_values3_0"), val = tensor([0])]; - int32 concat_259_axis_0 = const()[name = string("concat_259_axis_0"), val = int32(0)]; - bool concat_259_interleave_0 = const()[name = string("concat_259_interleave_0"), val = bool(false)]; - tensor concat_259 = concat(axis = concat_259_axis_0, interleave = concat_259_interleave_0, values = (expand_dims_178, concat_259_values1_0, end_pos_1, concat_259_values3_0))[name = string("concat_259")]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_115 = transpose(perm = var_11477, x = var_11472)[name = string("transpose_104")]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_258, begin_mask = model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0, end = concat_259, end_mask = model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_26_stride_0, update = value_states_115, x = coreml_update_state_80)[name = string("model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_133_write_state")]; - tensor coreml_update_state_81 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_133")]; - tensor var_11697_begin_0 = const()[name = string("op_11697_begin_0"), val = tensor([12, 0, 0, 0])]; - tensor var_11697_end_0 = const()[name = string("op_11697_end_0"), val = tensor([13, 1, 512, 256])]; - tensor var_11697_end_mask_0 = const()[name = string("op_11697_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_11697_cast_fp16 = slice_by_index(begin = var_11697_begin_0, end = var_11697_end_0, end_mask = var_11697_end_mask_0, x = coreml_update_state_81)[name = string("op_11697_cast_fp16")]; - tensor var_11704_begin_0 = const()[name = string("op_11704_begin_0"), val = tensor([34, 0, 0, 0])]; - tensor var_11704_end_0 = const()[name = string("op_11704_end_0"), val = tensor([35, 1, 512, 256])]; - tensor var_11704_end_mask_0 = const()[name = string("op_11704_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_11704_cast_fp16 = slice_by_index(begin = var_11704_begin_0, end = var_11704_end_0, end_mask = var_11704_end_mask_0, x = coreml_update_state_81)[name = string("op_11704_cast_fp16")]; - tensor var_11743 = const()[name = string("op_11743"), val = tensor([1, 4, 1, 1])]; - tensor x_229_cast_fp16 = tile(reps = var_11743, x = var_11697_cast_fp16)[name = string("x_229_cast_fp16")]; - tensor var_11763 = const()[name = string("op_11763"), val = tensor([1, 4, 1, 1])]; - tensor x_235_cast_fp16 = tile(reps = var_11763, x = var_11704_cast_fp16)[name = string("x_235_cast_fp16")]; - bool var_11790_transpose_x_0 = const()[name = string("op_11790_transpose_x_0"), val = bool(false)]; - bool var_11790_transpose_y_0 = const()[name = string("op_11790_transpose_y_0"), val = bool(true)]; - tensor var_11790 = matmul(transpose_x = var_11790_transpose_x_0, transpose_y = var_11790_transpose_y_0, x = query_states_115_cast_fp16, y = x_229_cast_fp16)[name = string("op_11790")]; - fp16 var_11791_to_fp16 = const()[name = string("op_11791_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_57_cast_fp16 = mul(x = var_11790, y = var_11791_to_fp16)[name = string("attn_weights_57_cast_fp16")]; - tensor attn_weights_59_cast_fp16 = add(x = attn_weights_57_cast_fp16, y = mask_slice_1)[name = string("attn_weights_59_cast_fp16")]; - int32 var_11826 = const()[name = string("op_11826"), val = int32(-1)]; - tensor var_11828_cast_fp16 = softmax(axis = var_11826, x = attn_weights_59_cast_fp16)[name = string("op_11828_cast_fp16")]; - tensor concat_264 = const()[name = string("concat_264"), val = tensor([4, 64, 512])]; - tensor reshape_42_cast_fp16 = reshape(shape = concat_264, x = var_11828_cast_fp16)[name = string("reshape_42_cast_fp16")]; - tensor concat_265 = const()[name = string("concat_265"), val = tensor([4, 512, 256])]; - tensor reshape_43_cast_fp16 = reshape(shape = concat_265, x = x_235_cast_fp16)[name = string("reshape_43_cast_fp16")]; - bool matmul_14_transpose_x_0 = const()[name = string("matmul_14_transpose_x_0"), val = bool(false)]; - bool matmul_14_transpose_y_0 = const()[name = string("matmul_14_transpose_y_0"), val = bool(false)]; - tensor matmul_14_cast_fp16 = matmul(transpose_x = matmul_14_transpose_x_0, transpose_y = matmul_14_transpose_y_0, x = reshape_42_cast_fp16, y = reshape_43_cast_fp16)[name = string("matmul_14_cast_fp16")]; - tensor concat_269 = const()[name = string("concat_269"), val = tensor([1, 4, 64, 256])]; - tensor reshape_44_cast_fp16 = reshape(shape = concat_269, x = matmul_14_cast_fp16)[name = string("reshape_44_cast_fp16")]; - tensor var_11840_perm_0 = const()[name = string("op_11840_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_11859 = const()[name = string("op_11859"), val = tensor([1, 64, 1024])]; - tensor var_11840_cast_fp16 = transpose(perm = var_11840_perm_0, x = reshape_44_cast_fp16)[name = string("transpose_103")]; - tensor attn_output_145_cast_fp16 = reshape(shape = var_11859, x = var_11840_cast_fp16)[name = string("attn_output_145_cast_fp16")]; - tensor var_11864 = const()[name = string("op_11864"), val = tensor([0, 2, 1])]; - string var_11880_pad_type_0 = const()[name = string("op_11880_pad_type_0"), val = string("valid")]; - int32 var_11880_groups_0 = const()[name = string("op_11880_groups_0"), val = int32(1)]; - tensor var_11880_strides_0 = const()[name = string("op_11880_strides_0"), val = tensor([1])]; - tensor var_11880_pad_0 = const()[name = string("op_11880_pad_0"), val = tensor([0, 0])]; - tensor var_11880_dilations_0 = const()[name = string("op_11880_dilations_0"), val = tensor([1])]; - tensor squeeze_14_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318912256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319797056))))[name = string("squeeze_14_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_11865_cast_fp16 = transpose(perm = var_11864, x = attn_output_145_cast_fp16)[name = string("transpose_102")]; - tensor var_11880_cast_fp16 = conv(dilations = var_11880_dilations_0, groups = var_11880_groups_0, pad = var_11880_pad_0, pad_type = var_11880_pad_type_0, strides = var_11880_strides_0, weight = squeeze_14_cast_fp16_to_fp32_to_fp16_palettized, x = var_11865_cast_fp16)[name = string("op_11880_cast_fp16")]; - tensor var_11884 = const()[name = string("op_11884"), val = tensor([0, 2, 1])]; - int32 var_11895 = const()[name = string("op_11895"), val = int32(-1)]; - fp16 const_633_promoted_to_fp16 = const()[name = string("const_633_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_233_cast_fp16 = transpose(perm = var_11884, x = var_11880_cast_fp16)[name = string("transpose_101")]; - tensor var_11897_cast_fp16 = mul(x = hidden_states_233_cast_fp16, y = const_633_promoted_to_fp16)[name = string("op_11897_cast_fp16")]; - bool input_291_interleave_0 = const()[name = string("input_291_interleave_0"), val = bool(false)]; - tensor input_291_cast_fp16 = concat(axis = var_11895, interleave = input_291_interleave_0, values = (hidden_states_233_cast_fp16, var_11897_cast_fp16))[name = string("input_291_cast_fp16")]; - tensor normed_349_axes_0 = const()[name = string("normed_349_axes_0"), val = tensor([-1])]; - fp16 var_11892_to_fp16 = const()[name = string("op_11892_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_349_cast_fp16 = layer_norm(axes = normed_349_axes_0, epsilon = var_11892_to_fp16, x = input_291_cast_fp16)[name = string("normed_349_cast_fp16")]; - tensor normed_351_begin_0 = const()[name = string("normed_351_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_351_end_0 = const()[name = string("normed_351_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_351_end_mask_0 = const()[name = string("normed_351_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_351_cast_fp16 = slice_by_index(begin = normed_351_begin_0, end = normed_351_end_0, end_mask = normed_351_end_mask_0, x = normed_349_cast_fp16)[name = string("normed_351_cast_fp16")]; - tensor var_11911_to_fp16 = const()[name = string("op_11911_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319815552)))]; - tensor attn_output_149_cast_fp16 = mul(x = normed_351_cast_fp16, y = var_11911_to_fp16)[name = string("attn_output_149_cast_fp16")]; - tensor hidden_states_235_cast_fp16 = add(x = hidden_states_225_cast_fp16, y = attn_output_149_cast_fp16)[name = string("hidden_states_235_cast_fp16")]; - int32 var_11924 = const()[name = string("op_11924"), val = int32(-1)]; - fp16 const_637_promoted_to_fp16 = const()[name = string("const_637_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11926_cast_fp16 = mul(x = hidden_states_235_cast_fp16, y = const_637_promoted_to_fp16)[name = string("op_11926_cast_fp16")]; - bool input_293_interleave_0 = const()[name = string("input_293_interleave_0"), val = bool(false)]; - tensor input_293_cast_fp16 = concat(axis = var_11924, interleave = input_293_interleave_0, values = (hidden_states_235_cast_fp16, var_11926_cast_fp16))[name = string("input_293_cast_fp16")]; - tensor normed_353_axes_0 = const()[name = string("normed_353_axes_0"), val = tensor([-1])]; - fp16 var_11921_to_fp16 = const()[name = string("op_11921_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_353_cast_fp16 = layer_norm(axes = normed_353_axes_0, epsilon = var_11921_to_fp16, x = input_293_cast_fp16)[name = string("normed_353_cast_fp16")]; - tensor normed_355_begin_0 = const()[name = string("normed_355_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_355_end_0 = const()[name = string("normed_355_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_355_end_mask_0 = const()[name = string("normed_355_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_355_cast_fp16 = slice_by_index(begin = normed_355_begin_0, end = normed_355_end_0, end_mask = normed_355_end_mask_0, x = normed_353_cast_fp16)[name = string("normed_355_cast_fp16")]; - tensor var_11940_to_fp16 = const()[name = string("op_11940_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319817920)))]; - tensor x_237_cast_fp16 = mul(x = normed_355_cast_fp16, y = var_11940_to_fp16)[name = string("x_237_cast_fp16")]; - tensor var_11952 = const()[name = string("op_11952"), val = tensor([0, 2, 1])]; - tensor input_295_axes_0 = const()[name = string("input_295_axes_0"), val = tensor([2])]; - tensor var_11953_cast_fp16 = transpose(perm = var_11952, x = x_237_cast_fp16)[name = string("transpose_100")]; - tensor input_295_cast_fp16 = expand_dims(axes = input_295_axes_0, x = var_11953_cast_fp16)[name = string("input_295_cast_fp16")]; - string x_239_pad_type_0 = const()[name = string("x_239_pad_type_0"), val = string("valid")]; - tensor x_239_strides_0 = const()[name = string("x_239_strides_0"), val = tensor([1, 1])]; - tensor x_239_pad_0 = const()[name = string("x_239_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_239_dilations_0 = const()[name = string("x_239_dilations_0"), val = tensor([1, 1])]; - int32 x_239_groups_0 = const()[name = string("x_239_groups_0"), val = int32(1)]; - tensor model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1049075200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1055047232))))[name = string("model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_239_cast_fp16 = conv(dilations = x_239_dilations_0, groups = x_239_groups_0, pad = x_239_pad_0, pad_type = x_239_pad_type_0, strides = x_239_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_295_cast_fp16)[name = string("x_239_cast_fp16")]; - string b_29_pad_type_0 = const()[name = string("b_29_pad_type_0"), val = string("valid")]; - tensor b_29_strides_0 = const()[name = string("b_29_strides_0"), val = tensor([1, 1])]; - tensor b_29_pad_0 = const()[name = string("b_29_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_29_dilations_0 = const()[name = string("b_29_dilations_0"), val = tensor([1, 1])]; - int32 b_29_groups_0 = const()[name = string("b_29_groups_0"), val = int32(1)]; - tensor model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1055157888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1061129920))))[name = string("model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_29_cast_fp16 = conv(dilations = b_29_dilations_0, groups = b_29_groups_0, pad = b_29_pad_0, pad_type = b_29_pad_type_0, strides = b_29_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_295_cast_fp16)[name = string("b_29_cast_fp16")]; - string var_11978_mode_0 = const()[name = string("op_11978_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_11978_cast_fp16 = gelu(mode = var_11978_mode_0, x = x_239_cast_fp16)[name = string("op_11978_cast_fp16")]; - tensor input_297_cast_fp16 = mul(x = var_11978_cast_fp16, y = b_29_cast_fp16)[name = string("input_297_cast_fp16")]; - string e_29_pad_type_0 = const()[name = string("e_29_pad_type_0"), val = string("valid")]; - tensor e_29_strides_0 = const()[name = string("e_29_strides_0"), val = tensor([1, 1])]; - tensor e_29_pad_0 = const()[name = string("e_29_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_29_dilations_0 = const()[name = string("e_29_dilations_0"), val = tensor([1, 1])]; - int32 e_29_groups_0 = const()[name = string("e_29_groups_0"), val = int32(1)]; - tensor model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(331985664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337957696))))[name = string("model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_29_cast_fp16 = conv(dilations = e_29_dilations_0, groups = e_29_groups_0, pad = e_29_pad_0, pad_type = e_29_pad_type_0, strides = e_29_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_297_cast_fp16)[name = string("e_29_cast_fp16")]; - tensor var_11986_axes_0 = const()[name = string("op_11986_axes_0"), val = tensor([2])]; - tensor var_11986_cast_fp16 = squeeze(axes = var_11986_axes_0, x = e_29_cast_fp16)[name = string("op_11986_cast_fp16")]; - tensor var_11987 = const()[name = string("op_11987"), val = tensor([0, 2, 1])]; - int32 var_11998 = const()[name = string("op_11998"), val = int32(-1)]; - fp16 const_641_promoted_to_fp16 = const()[name = string("const_641_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_237_cast_fp16 = transpose(perm = var_11987, x = var_11986_cast_fp16)[name = string("transpose_99")]; - tensor var_12000_cast_fp16 = mul(x = hidden_states_237_cast_fp16, y = const_641_promoted_to_fp16)[name = string("op_12000_cast_fp16")]; - bool input_299_interleave_0 = const()[name = string("input_299_interleave_0"), val = bool(false)]; - tensor input_299_cast_fp16 = concat(axis = var_11998, interleave = input_299_interleave_0, values = (hidden_states_237_cast_fp16, var_12000_cast_fp16))[name = string("input_299_cast_fp16")]; - tensor normed_357_axes_0 = const()[name = string("normed_357_axes_0"), val = tensor([-1])]; - fp16 var_11995_to_fp16 = const()[name = string("op_11995_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_357_cast_fp16 = layer_norm(axes = normed_357_axes_0, epsilon = var_11995_to_fp16, x = input_299_cast_fp16)[name = string("normed_357_cast_fp16")]; - tensor normed_359_begin_0 = const()[name = string("normed_359_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_359_end_0 = const()[name = string("normed_359_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_359_end_mask_0 = const()[name = string("normed_359_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_359_cast_fp16 = slice_by_index(begin = normed_359_begin_0, end = normed_359_end_0, end_mask = normed_359_end_mask_0, x = normed_357_cast_fp16)[name = string("normed_359_cast_fp16")]; - tensor var_12014_to_fp16 = const()[name = string("op_12014_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337976192)))]; - tensor hidden_states_239_cast_fp16 = mul(x = normed_359_cast_fp16, y = var_12014_to_fp16)[name = string("hidden_states_239_cast_fp16")]; - tensor hidden_states_241_cast_fp16 = add(x = hidden_states_235_cast_fp16, y = hidden_states_239_cast_fp16)[name = string("hidden_states_241_cast_fp16")]; - int32 var_12068 = const()[name = string("op_12068"), val = int32(-1)]; - fp16 const_646_promoted_to_fp16 = const()[name = string("const_646_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12070_cast_fp16 = mul(x = hidden_states_241_cast_fp16, y = const_646_promoted_to_fp16)[name = string("op_12070_cast_fp16")]; - bool input_301_interleave_0 = const()[name = string("input_301_interleave_0"), val = bool(false)]; - tensor input_301_cast_fp16 = concat(axis = var_12068, interleave = input_301_interleave_0, values = (hidden_states_241_cast_fp16, var_12070_cast_fp16))[name = string("input_301_cast_fp16")]; - tensor normed_361_axes_0 = const()[name = string("normed_361_axes_0"), val = tensor([-1])]; - fp16 var_12065_to_fp16 = const()[name = string("op_12065_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_361_cast_fp16 = layer_norm(axes = normed_361_axes_0, epsilon = var_12065_to_fp16, x = input_301_cast_fp16)[name = string("normed_361_cast_fp16")]; - tensor normed_363_begin_0 = const()[name = string("normed_363_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_363_end_0 = const()[name = string("normed_363_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_363_end_mask_0 = const()[name = string("normed_363_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_363_cast_fp16 = slice_by_index(begin = normed_363_begin_0, end = normed_363_end_0, end_mask = normed_363_end_mask_0, x = normed_361_cast_fp16)[name = string("normed_363_cast_fp16")]; - tensor var_12084_to_fp16 = const()[name = string("op_12084_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337978560)))]; - tensor hidden_states_243_cast_fp16 = mul(x = normed_363_cast_fp16, y = var_12084_to_fp16)[name = string("hidden_states_243_cast_fp16")]; - tensor var_12095 = const()[name = string("op_12095"), val = tensor([0, 2, 1])]; - tensor var_12098_axes_0 = const()[name = string("op_12098_axes_0"), val = tensor([2])]; - tensor var_12096_cast_fp16 = transpose(perm = var_12095, x = hidden_states_243_cast_fp16)[name = string("transpose_98")]; - tensor var_12098_cast_fp16 = expand_dims(axes = var_12098_axes_0, x = var_12096_cast_fp16)[name = string("op_12098_cast_fp16")]; - string query_states_121_pad_type_0 = const()[name = string("query_states_121_pad_type_0"), val = string("valid")]; - tensor query_states_121_strides_0 = const()[name = string("query_states_121_strides_0"), val = tensor([1, 1])]; - tensor query_states_121_pad_0 = const()[name = string("query_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_121_dilations_0 = const()[name = string("query_states_121_dilations_0"), val = tensor([1, 1])]; - int32 query_states_121_groups_0 = const()[name = string("query_states_121_groups_0"), val = int32(1)]; - tensor query_states_121 = conv(dilations = query_states_121_dilations_0, groups = query_states_121_groups_0, pad = query_states_121_pad_0, pad_type = query_states_121_pad_type_0, strides = query_states_121_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_12098_cast_fp16)[name = string("query_states_121")]; - string key_states_151_pad_type_0 = const()[name = string("key_states_151_pad_type_0"), val = string("valid")]; - tensor key_states_151_strides_0 = const()[name = string("key_states_151_strides_0"), val = tensor([1, 1])]; - tensor key_states_151_pad_0 = const()[name = string("key_states_151_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_151_dilations_0 = const()[name = string("key_states_151_dilations_0"), val = tensor([1, 1])]; - int32 key_states_151_groups_0 = const()[name = string("key_states_151_groups_0"), val = int32(1)]; - tensor key_states_151 = conv(dilations = key_states_151_dilations_0, groups = key_states_151_groups_0, pad = key_states_151_pad_0, pad_type = key_states_151_pad_type_0, strides = key_states_151_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_12098_cast_fp16)[name = string("key_states_151")]; - string value_states_121_pad_type_0 = const()[name = string("value_states_121_pad_type_0"), val = string("valid")]; - tensor value_states_121_strides_0 = const()[name = string("value_states_121_strides_0"), val = tensor([1, 1])]; - tensor value_states_121_pad_0 = const()[name = string("value_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_121_dilations_0 = const()[name = string("value_states_121_dilations_0"), val = tensor([1, 1])]; - int32 value_states_121_groups_0 = const()[name = string("value_states_121_groups_0"), val = int32(1)]; - tensor value_states_121 = conv(dilations = value_states_121_dilations_0, groups = value_states_121_groups_0, pad = value_states_121_pad_0, pad_type = value_states_121_pad_type_0, strides = value_states_121_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_12098_cast_fp16)[name = string("value_states_121")]; - tensor var_12140 = const()[name = string("op_12140"), val = tensor([1, 4, 256, 64])]; - tensor var_12141 = reshape(shape = var_12140, x = query_states_121)[name = string("op_12141")]; - tensor var_12146 = const()[name = string("op_12146"), val = tensor([0, 1, 3, 2])]; - tensor var_12151 = const()[name = string("op_12151"), val = tensor([1, 1, 256, 64])]; - tensor var_12152 = reshape(shape = var_12151, x = key_states_151)[name = string("op_12152")]; - tensor var_12157 = const()[name = string("op_12157"), val = tensor([0, 1, 3, 2])]; - tensor var_12162 = const()[name = string("op_12162"), val = tensor([1, 1, 256, 64])]; - tensor var_12163 = reshape(shape = var_12162, x = value_states_121)[name = string("op_12163")]; - tensor var_12168 = const()[name = string("op_12168"), val = tensor([0, 1, 3, 2])]; - int32 var_12179 = const()[name = string("op_12179"), val = int32(-1)]; - fp16 const_651_promoted = const()[name = string("const_651_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_245 = transpose(perm = var_12146, x = var_12141)[name = string("transpose_97")]; - tensor var_12181 = mul(x = hidden_states_245, y = const_651_promoted)[name = string("op_12181")]; - bool input_305_interleave_0 = const()[name = string("input_305_interleave_0"), val = bool(false)]; - tensor input_305 = concat(axis = var_12179, interleave = input_305_interleave_0, values = (hidden_states_245, var_12181))[name = string("input_305")]; - tensor normed_365_axes_0 = const()[name = string("normed_365_axes_0"), val = tensor([-1])]; - fp16 var_12176_to_fp16 = const()[name = string("op_12176_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_365_cast_fp16 = layer_norm(axes = normed_365_axes_0, epsilon = var_12176_to_fp16, x = input_305)[name = string("normed_365_cast_fp16")]; - tensor normed_367_begin_0 = const()[name = string("normed_367_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_367_end_0 = const()[name = string("normed_367_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_367_end_mask_0 = const()[name = string("normed_367_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_367 = slice_by_index(begin = normed_367_begin_0, end = normed_367_end_0, end_mask = normed_367_end_mask_0, x = normed_365_cast_fp16)[name = string("normed_367")]; - tensor var_12195_to_fp16 = const()[name = string("op_12195_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337980928)))]; - tensor q_31_cast_fp16 = mul(x = normed_367, y = var_12195_to_fp16)[name = string("q_31_cast_fp16")]; - int32 var_12206 = const()[name = string("op_12206"), val = int32(-1)]; - fp16 const_655_promoted = const()[name = string("const_655_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_247 = transpose(perm = var_12157, x = var_12152)[name = string("transpose_96")]; - tensor var_12208 = mul(x = hidden_states_247, y = const_655_promoted)[name = string("op_12208")]; - bool input_307_interleave_0 = const()[name = string("input_307_interleave_0"), val = bool(false)]; - tensor input_307 = concat(axis = var_12206, interleave = input_307_interleave_0, values = (hidden_states_247, var_12208))[name = string("input_307")]; - tensor normed_369_axes_0 = const()[name = string("normed_369_axes_0"), val = tensor([-1])]; - fp16 var_12203_to_fp16 = const()[name = string("op_12203_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_369_cast_fp16 = layer_norm(axes = normed_369_axes_0, epsilon = var_12203_to_fp16, x = input_307)[name = string("normed_369_cast_fp16")]; - tensor normed_371_begin_0 = const()[name = string("normed_371_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_371_end_0 = const()[name = string("normed_371_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_371_end_mask_0 = const()[name = string("normed_371_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_371 = slice_by_index(begin = normed_371_begin_0, end = normed_371_end_0, end_mask = normed_371_end_mask_0, x = normed_369_cast_fp16)[name = string("normed_371")]; - tensor var_12222_to_fp16 = const()[name = string("op_12222_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337981504)))]; - tensor k_31_cast_fp16 = mul(x = normed_371, y = var_12222_to_fp16)[name = string("k_31_cast_fp16")]; - tensor var_12236_cast_fp16 = mul(x = q_31_cast_fp16, y = cos_5)[name = string("op_12236_cast_fp16")]; - tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_31_cast_fp16)[name = string("x1_61_cast_fp16")]; - tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_31_cast_fp16)[name = string("x2_61_cast_fp16")]; - fp16 const_661_promoted_to_fp16 = const()[name = string("const_661_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12257_cast_fp16 = mul(x = x2_61_cast_fp16, y = const_661_promoted_to_fp16)[name = string("op_12257_cast_fp16")]; - int32 var_12259 = const()[name = string("op_12259"), val = int32(-1)]; - bool var_12260_interleave_0 = const()[name = string("op_12260_interleave_0"), val = bool(false)]; - tensor var_12260_cast_fp16 = concat(axis = var_12259, interleave = var_12260_interleave_0, values = (var_12257_cast_fp16, x1_61_cast_fp16))[name = string("op_12260_cast_fp16")]; - tensor var_12261_cast_fp16 = mul(x = var_12260_cast_fp16, y = sin_5)[name = string("op_12261_cast_fp16")]; - tensor query_states_123_cast_fp16 = add(x = var_12236_cast_fp16, y = var_12261_cast_fp16)[name = string("query_states_123_cast_fp16")]; - tensor var_12264_cast_fp16 = mul(x = k_31_cast_fp16, y = cos_5)[name = string("op_12264_cast_fp16")]; - tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_63_cast_fp16 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_31_cast_fp16)[name = string("x1_63_cast_fp16")]; - tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_63_cast_fp16 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_31_cast_fp16)[name = string("x2_63_cast_fp16")]; - fp16 const_664_promoted_to_fp16 = const()[name = string("const_664_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12285_cast_fp16 = mul(x = x2_63_cast_fp16, y = const_664_promoted_to_fp16)[name = string("op_12285_cast_fp16")]; - int32 var_12287 = const()[name = string("op_12287"), val = int32(-1)]; - bool var_12288_interleave_0 = const()[name = string("op_12288_interleave_0"), val = bool(false)]; - tensor var_12288_cast_fp16 = concat(axis = var_12287, interleave = var_12288_interleave_0, values = (var_12285_cast_fp16, x1_63_cast_fp16))[name = string("op_12288_cast_fp16")]; - tensor var_12289_cast_fp16 = mul(x = var_12288_cast_fp16, y = sin_5)[name = string("op_12289_cast_fp16")]; - tensor key_states_153_cast_fp16 = add(x = var_12264_cast_fp16, y = var_12289_cast_fp16)[name = string("key_states_153_cast_fp16")]; - tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([13])]; - tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; - tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; - tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([14])]; - int32 concat_272_axis_0 = const()[name = string("concat_272_axis_0"), val = int32(0)]; - bool concat_272_interleave_0 = const()[name = string("concat_272_interleave_0"), val = bool(false)]; - tensor concat_272 = concat(axis = concat_272_axis_0, interleave = concat_272_interleave_0, values = (expand_dims_180, expand_dims_181, current_pos, expand_dims_183))[name = string("concat_272")]; - tensor concat_273_values1_0 = const()[name = string("concat_273_values1_0"), val = tensor([0])]; - tensor concat_273_values3_0 = const()[name = string("concat_273_values3_0"), val = tensor([0])]; - int32 concat_273_axis_0 = const()[name = string("concat_273_axis_0"), val = int32(0)]; - bool concat_273_interleave_0 = const()[name = string("concat_273_interleave_0"), val = bool(false)]; - tensor concat_273 = concat(axis = concat_273_axis_0, interleave = concat_273_interleave_0, values = (expand_dims_184, concat_273_values1_0, end_pos_1, concat_273_values3_0))[name = string("concat_273")]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_272, begin_mask = model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0, end = concat_273, end_mask = model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_27_stride_0, update = key_states_153_cast_fp16, x = coreml_update_state_81)[name = string("model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_134_write_state")]; - tensor coreml_update_state_82 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_134")]; - tensor expand_dims_186 = const()[name = string("expand_dims_186"), val = tensor([35])]; - tensor expand_dims_187 = const()[name = string("expand_dims_187"), val = tensor([0])]; - tensor expand_dims_189 = const()[name = string("expand_dims_189"), val = tensor([0])]; - tensor expand_dims_190 = const()[name = string("expand_dims_190"), val = tensor([36])]; - int32 concat_276_axis_0 = const()[name = string("concat_276_axis_0"), val = int32(0)]; - bool concat_276_interleave_0 = const()[name = string("concat_276_interleave_0"), val = bool(false)]; - tensor concat_276 = concat(axis = concat_276_axis_0, interleave = concat_276_interleave_0, values = (expand_dims_186, expand_dims_187, current_pos, expand_dims_189))[name = string("concat_276")]; - tensor concat_277_values1_0 = const()[name = string("concat_277_values1_0"), val = tensor([0])]; - tensor concat_277_values3_0 = const()[name = string("concat_277_values3_0"), val = tensor([0])]; - int32 concat_277_axis_0 = const()[name = string("concat_277_axis_0"), val = int32(0)]; - bool concat_277_interleave_0 = const()[name = string("concat_277_interleave_0"), val = bool(false)]; - tensor concat_277 = concat(axis = concat_277_axis_0, interleave = concat_277_interleave_0, values = (expand_dims_190, concat_277_values1_0, end_pos_1, concat_277_values3_0))[name = string("concat_277")]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_123 = transpose(perm = var_12168, x = var_12163)[name = string("transpose_95")]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_276, begin_mask = model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0, end = concat_277, end_mask = model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_28_stride_0, update = value_states_123, x = coreml_update_state_82)[name = string("model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_135_write_state")]; - tensor coreml_update_state_83 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_135")]; - tensor var_12388_begin_0 = const()[name = string("op_12388_begin_0"), val = tensor([13, 0, 0, 0])]; - tensor var_12388_end_0 = const()[name = string("op_12388_end_0"), val = tensor([14, 1, 512, 256])]; - tensor var_12388_end_mask_0 = const()[name = string("op_12388_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_12388_cast_fp16 = slice_by_index(begin = var_12388_begin_0, end = var_12388_end_0, end_mask = var_12388_end_mask_0, x = coreml_update_state_83)[name = string("op_12388_cast_fp16")]; - tensor var_12395_begin_0 = const()[name = string("op_12395_begin_0"), val = tensor([35, 0, 0, 0])]; - tensor var_12395_end_0 = const()[name = string("op_12395_end_0"), val = tensor([36, 1, 512, 256])]; - tensor var_12395_end_mask_0 = const()[name = string("op_12395_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_12395_cast_fp16 = slice_by_index(begin = var_12395_begin_0, end = var_12395_end_0, end_mask = var_12395_end_mask_0, x = coreml_update_state_83)[name = string("op_12395_cast_fp16")]; - tensor var_12434 = const()[name = string("op_12434"), val = tensor([1, 4, 1, 1])]; - tensor x_245_cast_fp16 = tile(reps = var_12434, x = var_12388_cast_fp16)[name = string("x_245_cast_fp16")]; - tensor var_12454 = const()[name = string("op_12454"), val = tensor([1, 4, 1, 1])]; - tensor x_251_cast_fp16 = tile(reps = var_12454, x = var_12395_cast_fp16)[name = string("x_251_cast_fp16")]; - bool var_12481_transpose_x_0 = const()[name = string("op_12481_transpose_x_0"), val = bool(false)]; - bool var_12481_transpose_y_0 = const()[name = string("op_12481_transpose_y_0"), val = bool(true)]; - tensor var_12481 = matmul(transpose_x = var_12481_transpose_x_0, transpose_y = var_12481_transpose_y_0, x = query_states_123_cast_fp16, y = x_245_cast_fp16)[name = string("op_12481")]; - fp16 var_12482_to_fp16 = const()[name = string("op_12482_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_61_cast_fp16 = mul(x = var_12481, y = var_12482_to_fp16)[name = string("attn_weights_61_cast_fp16")]; - tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = mask_slice_1)[name = string("attn_weights_63_cast_fp16")]; - int32 var_12517 = const()[name = string("op_12517"), val = int32(-1)]; - tensor var_12519_cast_fp16 = softmax(axis = var_12517, x = attn_weights_63_cast_fp16)[name = string("op_12519_cast_fp16")]; - tensor concat_282 = const()[name = string("concat_282"), val = tensor([4, 64, 512])]; - tensor reshape_45_cast_fp16 = reshape(shape = concat_282, x = var_12519_cast_fp16)[name = string("reshape_45_cast_fp16")]; - tensor concat_283 = const()[name = string("concat_283"), val = tensor([4, 512, 256])]; - tensor reshape_46_cast_fp16 = reshape(shape = concat_283, x = x_251_cast_fp16)[name = string("reshape_46_cast_fp16")]; - bool matmul_15_transpose_x_0 = const()[name = string("matmul_15_transpose_x_0"), val = bool(false)]; - bool matmul_15_transpose_y_0 = const()[name = string("matmul_15_transpose_y_0"), val = bool(false)]; - tensor matmul_15_cast_fp16 = matmul(transpose_x = matmul_15_transpose_x_0, transpose_y = matmul_15_transpose_y_0, x = reshape_45_cast_fp16, y = reshape_46_cast_fp16)[name = string("matmul_15_cast_fp16")]; - tensor concat_287 = const()[name = string("concat_287"), val = tensor([1, 4, 64, 256])]; - tensor reshape_47_cast_fp16 = reshape(shape = concat_287, x = matmul_15_cast_fp16)[name = string("reshape_47_cast_fp16")]; - tensor var_12531_perm_0 = const()[name = string("op_12531_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_12550 = const()[name = string("op_12550"), val = tensor([1, 64, 1024])]; - tensor var_12531_cast_fp16 = transpose(perm = var_12531_perm_0, x = reshape_47_cast_fp16)[name = string("transpose_94")]; - tensor attn_output_155_cast_fp16 = reshape(shape = var_12550, x = var_12531_cast_fp16)[name = string("attn_output_155_cast_fp16")]; - tensor var_12555 = const()[name = string("op_12555"), val = tensor([0, 2, 1])]; - string var_12571_pad_type_0 = const()[name = string("op_12571_pad_type_0"), val = string("valid")]; - int32 var_12571_groups_0 = const()[name = string("op_12571_groups_0"), val = int32(1)]; - tensor var_12571_strides_0 = const()[name = string("op_12571_strides_0"), val = tensor([1])]; - tensor var_12571_pad_0 = const()[name = string("op_12571_pad_0"), val = tensor([0, 0])]; - tensor var_12571_dilations_0 = const()[name = string("op_12571_dilations_0"), val = tensor([1])]; - tensor squeeze_15_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337982080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338866880))))[name = string("squeeze_15_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_12556_cast_fp16 = transpose(perm = var_12555, x = attn_output_155_cast_fp16)[name = string("transpose_93")]; - tensor var_12571_cast_fp16 = conv(dilations = var_12571_dilations_0, groups = var_12571_groups_0, pad = var_12571_pad_0, pad_type = var_12571_pad_type_0, strides = var_12571_strides_0, weight = squeeze_15_cast_fp16_to_fp32_to_fp16_palettized, x = var_12556_cast_fp16)[name = string("op_12571_cast_fp16")]; - tensor var_12575 = const()[name = string("op_12575"), val = tensor([0, 2, 1])]; - int32 var_12586 = const()[name = string("op_12586"), val = int32(-1)]; - fp16 const_676_promoted_to_fp16 = const()[name = string("const_676_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_249_cast_fp16 = transpose(perm = var_12575, x = var_12571_cast_fp16)[name = string("transpose_92")]; - tensor var_12588_cast_fp16 = mul(x = hidden_states_249_cast_fp16, y = const_676_promoted_to_fp16)[name = string("op_12588_cast_fp16")]; - bool input_311_interleave_0 = const()[name = string("input_311_interleave_0"), val = bool(false)]; - tensor input_311_cast_fp16 = concat(axis = var_12586, interleave = input_311_interleave_0, values = (hidden_states_249_cast_fp16, var_12588_cast_fp16))[name = string("input_311_cast_fp16")]; - tensor normed_373_axes_0 = const()[name = string("normed_373_axes_0"), val = tensor([-1])]; - fp16 var_12583_to_fp16 = const()[name = string("op_12583_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_373_cast_fp16 = layer_norm(axes = normed_373_axes_0, epsilon = var_12583_to_fp16, x = input_311_cast_fp16)[name = string("normed_373_cast_fp16")]; - tensor normed_375_begin_0 = const()[name = string("normed_375_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_375_end_0 = const()[name = string("normed_375_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_375_end_mask_0 = const()[name = string("normed_375_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_375_cast_fp16 = slice_by_index(begin = normed_375_begin_0, end = normed_375_end_0, end_mask = normed_375_end_mask_0, x = normed_373_cast_fp16)[name = string("normed_375_cast_fp16")]; - tensor var_12602_to_fp16 = const()[name = string("op_12602_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338885376)))]; - tensor attn_output_159_cast_fp16 = mul(x = normed_375_cast_fp16, y = var_12602_to_fp16)[name = string("attn_output_159_cast_fp16")]; - tensor hidden_states_251_cast_fp16 = add(x = hidden_states_241_cast_fp16, y = attn_output_159_cast_fp16)[name = string("hidden_states_251_cast_fp16")]; - int32 var_12615 = const()[name = string("op_12615"), val = int32(-1)]; - fp16 const_680_promoted_to_fp16 = const()[name = string("const_680_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12617_cast_fp16 = mul(x = hidden_states_251_cast_fp16, y = const_680_promoted_to_fp16)[name = string("op_12617_cast_fp16")]; - bool input_313_interleave_0 = const()[name = string("input_313_interleave_0"), val = bool(false)]; - tensor input_313_cast_fp16 = concat(axis = var_12615, interleave = input_313_interleave_0, values = (hidden_states_251_cast_fp16, var_12617_cast_fp16))[name = string("input_313_cast_fp16")]; - tensor normed_377_axes_0 = const()[name = string("normed_377_axes_0"), val = tensor([-1])]; - fp16 var_12612_to_fp16 = const()[name = string("op_12612_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_377_cast_fp16 = layer_norm(axes = normed_377_axes_0, epsilon = var_12612_to_fp16, x = input_313_cast_fp16)[name = string("normed_377_cast_fp16")]; - tensor normed_379_begin_0 = const()[name = string("normed_379_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_379_end_0 = const()[name = string("normed_379_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_379_end_mask_0 = const()[name = string("normed_379_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_379_cast_fp16 = slice_by_index(begin = normed_379_begin_0, end = normed_379_end_0, end_mask = normed_379_end_mask_0, x = normed_377_cast_fp16)[name = string("normed_379_cast_fp16")]; - tensor var_12631_to_fp16 = const()[name = string("op_12631_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338887744)))]; - tensor x_253_cast_fp16 = mul(x = normed_379_cast_fp16, y = var_12631_to_fp16)[name = string("x_253_cast_fp16")]; - tensor var_12643 = const()[name = string("op_12643"), val = tensor([0, 2, 1])]; - tensor input_315_axes_0 = const()[name = string("input_315_axes_0"), val = tensor([2])]; - tensor var_12644_cast_fp16 = transpose(perm = var_12643, x = x_253_cast_fp16)[name = string("transpose_91")]; - tensor input_315_cast_fp16 = expand_dims(axes = input_315_axes_0, x = var_12644_cast_fp16)[name = string("input_315_cast_fp16")]; - string x_255_pad_type_0 = const()[name = string("x_255_pad_type_0"), val = string("valid")]; - tensor x_255_strides_0 = const()[name = string("x_255_strides_0"), val = tensor([1, 1])]; - tensor x_255_pad_0 = const()[name = string("x_255_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_255_dilations_0 = const()[name = string("x_255_dilations_0"), val = tensor([1, 1])]; - int32 x_255_groups_0 = const()[name = string("x_255_groups_0"), val = int32(1)]; - tensor model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1061240576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1067212608))))[name = string("model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_255_cast_fp16 = conv(dilations = x_255_dilations_0, groups = x_255_groups_0, pad = x_255_pad_0, pad_type = x_255_pad_type_0, strides = x_255_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_315_cast_fp16)[name = string("x_255_cast_fp16")]; - string b_31_pad_type_0 = const()[name = string("b_31_pad_type_0"), val = string("valid")]; - tensor b_31_strides_0 = const()[name = string("b_31_strides_0"), val = tensor([1, 1])]; - tensor b_31_pad_0 = const()[name = string("b_31_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_31_dilations_0 = const()[name = string("b_31_dilations_0"), val = tensor([1, 1])]; - int32 b_31_groups_0 = const()[name = string("b_31_groups_0"), val = int32(1)]; - tensor model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1067323264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1073295296))))[name = string("model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_31_cast_fp16 = conv(dilations = b_31_dilations_0, groups = b_31_groups_0, pad = b_31_pad_0, pad_type = b_31_pad_type_0, strides = b_31_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_315_cast_fp16)[name = string("b_31_cast_fp16")]; - string var_12669_mode_0 = const()[name = string("op_12669_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_12669_cast_fp16 = gelu(mode = var_12669_mode_0, x = x_255_cast_fp16)[name = string("op_12669_cast_fp16")]; - tensor input_317_cast_fp16 = mul(x = var_12669_cast_fp16, y = b_31_cast_fp16)[name = string("input_317_cast_fp16")]; - string e_31_pad_type_0 = const()[name = string("e_31_pad_type_0"), val = string("valid")]; - tensor e_31_strides_0 = const()[name = string("e_31_strides_0"), val = tensor([1, 1])]; - tensor e_31_pad_0 = const()[name = string("e_31_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_31_dilations_0 = const()[name = string("e_31_dilations_0"), val = tensor([1, 1])]; - int32 e_31_groups_0 = const()[name = string("e_31_groups_0"), val = int32(1)]; - tensor model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351055488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357027520))))[name = string("model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_31_cast_fp16 = conv(dilations = e_31_dilations_0, groups = e_31_groups_0, pad = e_31_pad_0, pad_type = e_31_pad_type_0, strides = e_31_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_317_cast_fp16)[name = string("e_31_cast_fp16")]; - tensor var_12677_axes_0 = const()[name = string("op_12677_axes_0"), val = tensor([2])]; - tensor var_12677_cast_fp16 = squeeze(axes = var_12677_axes_0, x = e_31_cast_fp16)[name = string("op_12677_cast_fp16")]; - tensor var_12678 = const()[name = string("op_12678"), val = tensor([0, 2, 1])]; - int32 var_12689 = const()[name = string("op_12689"), val = int32(-1)]; - fp16 const_684_promoted_to_fp16 = const()[name = string("const_684_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_253_cast_fp16 = transpose(perm = var_12678, x = var_12677_cast_fp16)[name = string("transpose_90")]; - tensor var_12691_cast_fp16 = mul(x = hidden_states_253_cast_fp16, y = const_684_promoted_to_fp16)[name = string("op_12691_cast_fp16")]; - bool input_319_interleave_0 = const()[name = string("input_319_interleave_0"), val = bool(false)]; - tensor input_319_cast_fp16 = concat(axis = var_12689, interleave = input_319_interleave_0, values = (hidden_states_253_cast_fp16, var_12691_cast_fp16))[name = string("input_319_cast_fp16")]; - tensor normed_381_axes_0 = const()[name = string("normed_381_axes_0"), val = tensor([-1])]; - fp16 var_12686_to_fp16 = const()[name = string("op_12686_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_381_cast_fp16 = layer_norm(axes = normed_381_axes_0, epsilon = var_12686_to_fp16, x = input_319_cast_fp16)[name = string("normed_381_cast_fp16")]; - tensor normed_383_begin_0 = const()[name = string("normed_383_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_383_end_0 = const()[name = string("normed_383_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_383_end_mask_0 = const()[name = string("normed_383_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_383_cast_fp16 = slice_by_index(begin = normed_383_begin_0, end = normed_383_end_0, end_mask = normed_383_end_mask_0, x = normed_381_cast_fp16)[name = string("normed_383_cast_fp16")]; - tensor var_12705_to_fp16 = const()[name = string("op_12705_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357046016)))]; - tensor hidden_states_255_cast_fp16 = mul(x = normed_383_cast_fp16, y = var_12705_to_fp16)[name = string("hidden_states_255_cast_fp16")]; - tensor hidden_states_257_cast_fp16 = add(x = hidden_states_251_cast_fp16, y = hidden_states_255_cast_fp16)[name = string("hidden_states_257_cast_fp16")]; - int32 var_12759 = const()[name = string("op_12759"), val = int32(-1)]; - fp16 const_689_promoted_to_fp16 = const()[name = string("const_689_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12761_cast_fp16 = mul(x = hidden_states_257_cast_fp16, y = const_689_promoted_to_fp16)[name = string("op_12761_cast_fp16")]; - bool input_321_interleave_0 = const()[name = string("input_321_interleave_0"), val = bool(false)]; - tensor input_321_cast_fp16 = concat(axis = var_12759, interleave = input_321_interleave_0, values = (hidden_states_257_cast_fp16, var_12761_cast_fp16))[name = string("input_321_cast_fp16")]; - tensor normed_385_axes_0 = const()[name = string("normed_385_axes_0"), val = tensor([-1])]; - fp16 var_12756_to_fp16 = const()[name = string("op_12756_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_385_cast_fp16 = layer_norm(axes = normed_385_axes_0, epsilon = var_12756_to_fp16, x = input_321_cast_fp16)[name = string("normed_385_cast_fp16")]; - tensor normed_387_begin_0 = const()[name = string("normed_387_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_387_end_0 = const()[name = string("normed_387_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_387_end_mask_0 = const()[name = string("normed_387_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_387_cast_fp16 = slice_by_index(begin = normed_387_begin_0, end = normed_387_end_0, end_mask = normed_387_end_mask_0, x = normed_385_cast_fp16)[name = string("normed_387_cast_fp16")]; - tensor var_12775_to_fp16 = const()[name = string("op_12775_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357048384)))]; - tensor hidden_states_259_cast_fp16 = mul(x = normed_387_cast_fp16, y = var_12775_to_fp16)[name = string("hidden_states_259_cast_fp16")]; - tensor var_12786 = const()[name = string("op_12786"), val = tensor([0, 2, 1])]; - tensor var_12789_axes_0 = const()[name = string("op_12789_axes_0"), val = tensor([2])]; - tensor var_12787_cast_fp16 = transpose(perm = var_12786, x = hidden_states_259_cast_fp16)[name = string("transpose_89")]; - tensor var_12789_cast_fp16 = expand_dims(axes = var_12789_axes_0, x = var_12787_cast_fp16)[name = string("op_12789_cast_fp16")]; - string query_states_129_pad_type_0 = const()[name = string("query_states_129_pad_type_0"), val = string("valid")]; - tensor query_states_129_strides_0 = const()[name = string("query_states_129_strides_0"), val = tensor([1, 1])]; - tensor query_states_129_pad_0 = const()[name = string("query_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_129_dilations_0 = const()[name = string("query_states_129_dilations_0"), val = tensor([1, 1])]; - int32 query_states_129_groups_0 = const()[name = string("query_states_129_groups_0"), val = int32(1)]; - tensor query_states_129 = conv(dilations = query_states_129_dilations_0, groups = query_states_129_groups_0, pad = query_states_129_pad_0, pad_type = query_states_129_pad_type_0, strides = query_states_129_strides_0, weight = model_model_layers_16_self_attn_q_proj_weight_palettized, x = var_12789_cast_fp16)[name = string("query_states_129")]; - string key_states_161_pad_type_0 = const()[name = string("key_states_161_pad_type_0"), val = string("valid")]; - tensor key_states_161_strides_0 = const()[name = string("key_states_161_strides_0"), val = tensor([1, 1])]; - tensor key_states_161_pad_0 = const()[name = string("key_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_161_dilations_0 = const()[name = string("key_states_161_dilations_0"), val = tensor([1, 1])]; - int32 key_states_161_groups_0 = const()[name = string("key_states_161_groups_0"), val = int32(1)]; - tensor key_states_161 = conv(dilations = key_states_161_dilations_0, groups = key_states_161_groups_0, pad = key_states_161_pad_0, pad_type = key_states_161_pad_type_0, strides = key_states_161_strides_0, weight = model_model_layers_16_self_attn_k_proj_weight_palettized, x = var_12789_cast_fp16)[name = string("key_states_161")]; - string value_states_129_pad_type_0 = const()[name = string("value_states_129_pad_type_0"), val = string("valid")]; - tensor value_states_129_strides_0 = const()[name = string("value_states_129_strides_0"), val = tensor([1, 1])]; - tensor value_states_129_pad_0 = const()[name = string("value_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_129_dilations_0 = const()[name = string("value_states_129_dilations_0"), val = tensor([1, 1])]; - int32 value_states_129_groups_0 = const()[name = string("value_states_129_groups_0"), val = int32(1)]; - tensor value_states_129 = conv(dilations = value_states_129_dilations_0, groups = value_states_129_groups_0, pad = value_states_129_pad_0, pad_type = value_states_129_pad_type_0, strides = value_states_129_strides_0, weight = model_model_layers_16_self_attn_v_proj_weight_palettized, x = var_12789_cast_fp16)[name = string("value_states_129")]; - tensor var_12831 = const()[name = string("op_12831"), val = tensor([1, 4, 256, 64])]; - tensor var_12832 = reshape(shape = var_12831, x = query_states_129)[name = string("op_12832")]; - tensor var_12837 = const()[name = string("op_12837"), val = tensor([0, 1, 3, 2])]; - tensor var_12842 = const()[name = string("op_12842"), val = tensor([1, 1, 256, 64])]; - tensor var_12843 = reshape(shape = var_12842, x = key_states_161)[name = string("op_12843")]; - tensor var_12848 = const()[name = string("op_12848"), val = tensor([0, 1, 3, 2])]; - tensor var_12853 = const()[name = string("op_12853"), val = tensor([1, 1, 256, 64])]; - tensor var_12854 = reshape(shape = var_12853, x = value_states_129)[name = string("op_12854")]; - tensor var_12859 = const()[name = string("op_12859"), val = tensor([0, 1, 3, 2])]; - int32 var_12870 = const()[name = string("op_12870"), val = int32(-1)]; - fp16 const_694_promoted = const()[name = string("const_694_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_261 = transpose(perm = var_12837, x = var_12832)[name = string("transpose_88")]; - tensor var_12872 = mul(x = hidden_states_261, y = const_694_promoted)[name = string("op_12872")]; - bool input_325_interleave_0 = const()[name = string("input_325_interleave_0"), val = bool(false)]; - tensor input_325 = concat(axis = var_12870, interleave = input_325_interleave_0, values = (hidden_states_261, var_12872))[name = string("input_325")]; - tensor normed_389_axes_0 = const()[name = string("normed_389_axes_0"), val = tensor([-1])]; - fp16 var_12867_to_fp16 = const()[name = string("op_12867_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_389_cast_fp16 = layer_norm(axes = normed_389_axes_0, epsilon = var_12867_to_fp16, x = input_325)[name = string("normed_389_cast_fp16")]; - tensor normed_391_begin_0 = const()[name = string("normed_391_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_391_end_0 = const()[name = string("normed_391_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_391_end_mask_0 = const()[name = string("normed_391_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_391 = slice_by_index(begin = normed_391_begin_0, end = normed_391_end_0, end_mask = normed_391_end_mask_0, x = normed_389_cast_fp16)[name = string("normed_391")]; - tensor var_12886_to_fp16 = const()[name = string("op_12886_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357050752)))]; - tensor q_33_cast_fp16 = mul(x = normed_391, y = var_12886_to_fp16)[name = string("q_33_cast_fp16")]; - int32 var_12897 = const()[name = string("op_12897"), val = int32(-1)]; - fp16 const_698_promoted = const()[name = string("const_698_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_263 = transpose(perm = var_12848, x = var_12843)[name = string("transpose_87")]; - tensor var_12899 = mul(x = hidden_states_263, y = const_698_promoted)[name = string("op_12899")]; - bool input_327_interleave_0 = const()[name = string("input_327_interleave_0"), val = bool(false)]; - tensor input_327 = concat(axis = var_12897, interleave = input_327_interleave_0, values = (hidden_states_263, var_12899))[name = string("input_327")]; - tensor normed_393_axes_0 = const()[name = string("normed_393_axes_0"), val = tensor([-1])]; - fp16 var_12894_to_fp16 = const()[name = string("op_12894_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_393_cast_fp16 = layer_norm(axes = normed_393_axes_0, epsilon = var_12894_to_fp16, x = input_327)[name = string("normed_393_cast_fp16")]; - tensor normed_395_begin_0 = const()[name = string("normed_395_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_395_end_0 = const()[name = string("normed_395_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_395_end_mask_0 = const()[name = string("normed_395_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_395 = slice_by_index(begin = normed_395_begin_0, end = normed_395_end_0, end_mask = normed_395_end_mask_0, x = normed_393_cast_fp16)[name = string("normed_395")]; - tensor var_12913_to_fp16 = const()[name = string("op_12913_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357051328)))]; - tensor k_33_cast_fp16 = mul(x = normed_395, y = var_12913_to_fp16)[name = string("k_33_cast_fp16")]; - tensor var_12927_cast_fp16 = mul(x = q_33_cast_fp16, y = cos_5)[name = string("op_12927_cast_fp16")]; - tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_65_cast_fp16 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_33_cast_fp16)[name = string("x1_65_cast_fp16")]; - tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_65_cast_fp16 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_33_cast_fp16)[name = string("x2_65_cast_fp16")]; - fp16 const_704_promoted_to_fp16 = const()[name = string("const_704_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12948_cast_fp16 = mul(x = x2_65_cast_fp16, y = const_704_promoted_to_fp16)[name = string("op_12948_cast_fp16")]; - int32 var_12950 = const()[name = string("op_12950"), val = int32(-1)]; - bool var_12951_interleave_0 = const()[name = string("op_12951_interleave_0"), val = bool(false)]; - tensor var_12951_cast_fp16 = concat(axis = var_12950, interleave = var_12951_interleave_0, values = (var_12948_cast_fp16, x1_65_cast_fp16))[name = string("op_12951_cast_fp16")]; - tensor var_12952_cast_fp16 = mul(x = var_12951_cast_fp16, y = sin_5)[name = string("op_12952_cast_fp16")]; - tensor query_states_131_cast_fp16 = add(x = var_12927_cast_fp16, y = var_12952_cast_fp16)[name = string("query_states_131_cast_fp16")]; - tensor var_12955_cast_fp16 = mul(x = k_33_cast_fp16, y = cos_5)[name = string("op_12955_cast_fp16")]; - tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_67_cast_fp16 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_33_cast_fp16)[name = string("x1_67_cast_fp16")]; - tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_67_cast_fp16 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_33_cast_fp16)[name = string("x2_67_cast_fp16")]; - fp16 const_707_promoted_to_fp16 = const()[name = string("const_707_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12976_cast_fp16 = mul(x = x2_67_cast_fp16, y = const_707_promoted_to_fp16)[name = string("op_12976_cast_fp16")]; - int32 var_12978 = const()[name = string("op_12978"), val = int32(-1)]; - bool var_12979_interleave_0 = const()[name = string("op_12979_interleave_0"), val = bool(false)]; - tensor var_12979_cast_fp16 = concat(axis = var_12978, interleave = var_12979_interleave_0, values = (var_12976_cast_fp16, x1_67_cast_fp16))[name = string("op_12979_cast_fp16")]; - tensor var_12980_cast_fp16 = mul(x = var_12979_cast_fp16, y = sin_5)[name = string("op_12980_cast_fp16")]; - tensor key_states_163_cast_fp16 = add(x = var_12955_cast_fp16, y = var_12980_cast_fp16)[name = string("key_states_163_cast_fp16")]; - tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([14])]; - tensor expand_dims_193 = const()[name = string("expand_dims_193"), val = tensor([0])]; - tensor expand_dims_195 = const()[name = string("expand_dims_195"), val = tensor([0])]; - tensor expand_dims_196 = const()[name = string("expand_dims_196"), val = tensor([15])]; - int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)]; - bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)]; - tensor concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (expand_dims_192, expand_dims_193, current_pos, expand_dims_195))[name = string("concat_290")]; - tensor concat_291_values1_0 = const()[name = string("concat_291_values1_0"), val = tensor([0])]; - tensor concat_291_values3_0 = const()[name = string("concat_291_values3_0"), val = tensor([0])]; - int32 concat_291_axis_0 = const()[name = string("concat_291_axis_0"), val = int32(0)]; - bool concat_291_interleave_0 = const()[name = string("concat_291_interleave_0"), val = bool(false)]; - tensor concat_291 = concat(axis = concat_291_axis_0, interleave = concat_291_interleave_0, values = (expand_dims_196, concat_291_values1_0, end_pos_1, concat_291_values3_0))[name = string("concat_291")]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_290, begin_mask = model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0, end = concat_291, end_mask = model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_29_stride_0, update = key_states_163_cast_fp16, x = coreml_update_state_83)[name = string("model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_136_write_state")]; - tensor coreml_update_state_84 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_136")]; - tensor expand_dims_198 = const()[name = string("expand_dims_198"), val = tensor([36])]; - tensor expand_dims_199 = const()[name = string("expand_dims_199"), val = tensor([0])]; - tensor expand_dims_201 = const()[name = string("expand_dims_201"), val = tensor([0])]; - tensor expand_dims_202 = const()[name = string("expand_dims_202"), val = tensor([37])]; - int32 concat_294_axis_0 = const()[name = string("concat_294_axis_0"), val = int32(0)]; - bool concat_294_interleave_0 = const()[name = string("concat_294_interleave_0"), val = bool(false)]; - tensor concat_294 = concat(axis = concat_294_axis_0, interleave = concat_294_interleave_0, values = (expand_dims_198, expand_dims_199, current_pos, expand_dims_201))[name = string("concat_294")]; - tensor concat_295_values1_0 = const()[name = string("concat_295_values1_0"), val = tensor([0])]; - tensor concat_295_values3_0 = const()[name = string("concat_295_values3_0"), val = tensor([0])]; - int32 concat_295_axis_0 = const()[name = string("concat_295_axis_0"), val = int32(0)]; - bool concat_295_interleave_0 = const()[name = string("concat_295_interleave_0"), val = bool(false)]; - tensor concat_295 = concat(axis = concat_295_axis_0, interleave = concat_295_interleave_0, values = (expand_dims_202, concat_295_values1_0, end_pos_1, concat_295_values3_0))[name = string("concat_295")]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_131 = transpose(perm = var_12859, x = var_12854)[name = string("transpose_86")]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_294, begin_mask = model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0, end = concat_295, end_mask = model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_30_stride_0, update = value_states_131, x = coreml_update_state_84)[name = string("model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_137_write_state")]; - tensor coreml_update_state_85 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_137")]; - tensor var_13079_begin_0 = const()[name = string("op_13079_begin_0"), val = tensor([14, 0, 0, 0])]; - tensor var_13079_end_0 = const()[name = string("op_13079_end_0"), val = tensor([15, 1, 512, 256])]; - tensor var_13079_end_mask_0 = const()[name = string("op_13079_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_13079_cast_fp16 = slice_by_index(begin = var_13079_begin_0, end = var_13079_end_0, end_mask = var_13079_end_mask_0, x = coreml_update_state_85)[name = string("op_13079_cast_fp16")]; - tensor var_13086_begin_0 = const()[name = string("op_13086_begin_0"), val = tensor([36, 0, 0, 0])]; - tensor var_13086_end_0 = const()[name = string("op_13086_end_0"), val = tensor([37, 1, 512, 256])]; - tensor var_13086_end_mask_0 = const()[name = string("op_13086_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_13086_cast_fp16 = slice_by_index(begin = var_13086_begin_0, end = var_13086_end_0, end_mask = var_13086_end_mask_0, x = coreml_update_state_85)[name = string("op_13086_cast_fp16")]; - tensor var_13125 = const()[name = string("op_13125"), val = tensor([1, 4, 1, 1])]; - tensor x_261_cast_fp16 = tile(reps = var_13125, x = var_13079_cast_fp16)[name = string("x_261_cast_fp16")]; - tensor var_13145 = const()[name = string("op_13145"), val = tensor([1, 4, 1, 1])]; - tensor x_267_cast_fp16 = tile(reps = var_13145, x = var_13086_cast_fp16)[name = string("x_267_cast_fp16")]; - bool var_13172_transpose_x_0 = const()[name = string("op_13172_transpose_x_0"), val = bool(false)]; - bool var_13172_transpose_y_0 = const()[name = string("op_13172_transpose_y_0"), val = bool(true)]; - tensor var_13172 = matmul(transpose_x = var_13172_transpose_x_0, transpose_y = var_13172_transpose_y_0, x = query_states_131_cast_fp16, y = x_261_cast_fp16)[name = string("op_13172")]; - fp16 var_13173_to_fp16 = const()[name = string("op_13173_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_65_cast_fp16 = mul(x = var_13172, y = var_13173_to_fp16)[name = string("attn_weights_65_cast_fp16")]; - tensor attn_weights_67_cast_fp16 = add(x = attn_weights_65_cast_fp16, y = mask_slice_1)[name = string("attn_weights_67_cast_fp16")]; - int32 var_13208 = const()[name = string("op_13208"), val = int32(-1)]; - tensor var_13210_cast_fp16 = softmax(axis = var_13208, x = attn_weights_67_cast_fp16)[name = string("op_13210_cast_fp16")]; - tensor concat_300 = const()[name = string("concat_300"), val = tensor([4, 64, 512])]; - tensor reshape_48_cast_fp16 = reshape(shape = concat_300, x = var_13210_cast_fp16)[name = string("reshape_48_cast_fp16")]; - tensor concat_301 = const()[name = string("concat_301"), val = tensor([4, 512, 256])]; - tensor reshape_49_cast_fp16 = reshape(shape = concat_301, x = x_267_cast_fp16)[name = string("reshape_49_cast_fp16")]; - bool matmul_16_transpose_x_0 = const()[name = string("matmul_16_transpose_x_0"), val = bool(false)]; - bool matmul_16_transpose_y_0 = const()[name = string("matmul_16_transpose_y_0"), val = bool(false)]; - tensor matmul_16_cast_fp16 = matmul(transpose_x = matmul_16_transpose_x_0, transpose_y = matmul_16_transpose_y_0, x = reshape_48_cast_fp16, y = reshape_49_cast_fp16)[name = string("matmul_16_cast_fp16")]; - tensor concat_305 = const()[name = string("concat_305"), val = tensor([1, 4, 64, 256])]; - tensor reshape_50_cast_fp16 = reshape(shape = concat_305, x = matmul_16_cast_fp16)[name = string("reshape_50_cast_fp16")]; - tensor var_13222_perm_0 = const()[name = string("op_13222_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_13241 = const()[name = string("op_13241"), val = tensor([1, 64, 1024])]; - tensor var_13222_cast_fp16 = transpose(perm = var_13222_perm_0, x = reshape_50_cast_fp16)[name = string("transpose_85")]; - tensor attn_output_165_cast_fp16 = reshape(shape = var_13241, x = var_13222_cast_fp16)[name = string("attn_output_165_cast_fp16")]; - tensor var_13246 = const()[name = string("op_13246"), val = tensor([0, 2, 1])]; - string var_13262_pad_type_0 = const()[name = string("op_13262_pad_type_0"), val = string("valid")]; - int32 var_13262_groups_0 = const()[name = string("op_13262_groups_0"), val = int32(1)]; - tensor var_13262_strides_0 = const()[name = string("op_13262_strides_0"), val = tensor([1])]; - tensor var_13262_pad_0 = const()[name = string("op_13262_pad_0"), val = tensor([0, 0])]; - tensor var_13262_dilations_0 = const()[name = string("op_13262_dilations_0"), val = tensor([1])]; - tensor squeeze_16_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357051904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357936704))))[name = string("squeeze_16_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_13247_cast_fp16 = transpose(perm = var_13246, x = attn_output_165_cast_fp16)[name = string("transpose_84")]; - tensor var_13262_cast_fp16 = conv(dilations = var_13262_dilations_0, groups = var_13262_groups_0, pad = var_13262_pad_0, pad_type = var_13262_pad_type_0, strides = var_13262_strides_0, weight = squeeze_16_cast_fp16_to_fp32_to_fp16_palettized, x = var_13247_cast_fp16)[name = string("op_13262_cast_fp16")]; - tensor var_13266 = const()[name = string("op_13266"), val = tensor([0, 2, 1])]; - int32 var_13277 = const()[name = string("op_13277"), val = int32(-1)]; - fp16 const_719_promoted_to_fp16 = const()[name = string("const_719_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_265_cast_fp16 = transpose(perm = var_13266, x = var_13262_cast_fp16)[name = string("transpose_83")]; - tensor var_13279_cast_fp16 = mul(x = hidden_states_265_cast_fp16, y = const_719_promoted_to_fp16)[name = string("op_13279_cast_fp16")]; - bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; - tensor input_331_cast_fp16 = concat(axis = var_13277, interleave = input_331_interleave_0, values = (hidden_states_265_cast_fp16, var_13279_cast_fp16))[name = string("input_331_cast_fp16")]; - tensor normed_397_axes_0 = const()[name = string("normed_397_axes_0"), val = tensor([-1])]; - fp16 var_13274_to_fp16 = const()[name = string("op_13274_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_397_cast_fp16 = layer_norm(axes = normed_397_axes_0, epsilon = var_13274_to_fp16, x = input_331_cast_fp16)[name = string("normed_397_cast_fp16")]; - tensor normed_399_begin_0 = const()[name = string("normed_399_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_399_end_0 = const()[name = string("normed_399_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_399_end_mask_0 = const()[name = string("normed_399_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_399_cast_fp16 = slice_by_index(begin = normed_399_begin_0, end = normed_399_end_0, end_mask = normed_399_end_mask_0, x = normed_397_cast_fp16)[name = string("normed_399_cast_fp16")]; - tensor var_13293_to_fp16 = const()[name = string("op_13293_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357955200)))]; - tensor attn_output_169_cast_fp16 = mul(x = normed_399_cast_fp16, y = var_13293_to_fp16)[name = string("attn_output_169_cast_fp16")]; - tensor hidden_states_267_cast_fp16 = add(x = hidden_states_257_cast_fp16, y = attn_output_169_cast_fp16)[name = string("hidden_states_267_cast_fp16")]; - int32 var_13306 = const()[name = string("op_13306"), val = int32(-1)]; - fp16 const_723_promoted_to_fp16 = const()[name = string("const_723_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13308_cast_fp16 = mul(x = hidden_states_267_cast_fp16, y = const_723_promoted_to_fp16)[name = string("op_13308_cast_fp16")]; - bool input_333_interleave_0 = const()[name = string("input_333_interleave_0"), val = bool(false)]; - tensor input_333_cast_fp16 = concat(axis = var_13306, interleave = input_333_interleave_0, values = (hidden_states_267_cast_fp16, var_13308_cast_fp16))[name = string("input_333_cast_fp16")]; - tensor normed_401_axes_0 = const()[name = string("normed_401_axes_0"), val = tensor([-1])]; - fp16 var_13303_to_fp16 = const()[name = string("op_13303_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_401_cast_fp16 = layer_norm(axes = normed_401_axes_0, epsilon = var_13303_to_fp16, x = input_333_cast_fp16)[name = string("normed_401_cast_fp16")]; - tensor normed_403_begin_0 = const()[name = string("normed_403_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_403_end_0 = const()[name = string("normed_403_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_403_end_mask_0 = const()[name = string("normed_403_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_403_cast_fp16 = slice_by_index(begin = normed_403_begin_0, end = normed_403_end_0, end_mask = normed_403_end_mask_0, x = normed_401_cast_fp16)[name = string("normed_403_cast_fp16")]; - tensor var_13322_to_fp16 = const()[name = string("op_13322_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357957568)))]; - tensor x_269_cast_fp16 = mul(x = normed_403_cast_fp16, y = var_13322_to_fp16)[name = string("x_269_cast_fp16")]; - tensor var_13334 = const()[name = string("op_13334"), val = tensor([0, 2, 1])]; - tensor input_335_axes_0 = const()[name = string("input_335_axes_0"), val = tensor([2])]; - tensor var_13335_cast_fp16 = transpose(perm = var_13334, x = x_269_cast_fp16)[name = string("transpose_82")]; - tensor input_335_cast_fp16 = expand_dims(axes = input_335_axes_0, x = var_13335_cast_fp16)[name = string("input_335_cast_fp16")]; - string x_271_pad_type_0 = const()[name = string("x_271_pad_type_0"), val = string("valid")]; - tensor x_271_strides_0 = const()[name = string("x_271_strides_0"), val = tensor([1, 1])]; - tensor x_271_pad_0 = const()[name = string("x_271_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_271_dilations_0 = const()[name = string("x_271_dilations_0"), val = tensor([1, 1])]; - int32 x_271_groups_0 = const()[name = string("x_271_groups_0"), val = int32(1)]; - tensor model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1073405952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1079377984))))[name = string("model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_271_cast_fp16 = conv(dilations = x_271_dilations_0, groups = x_271_groups_0, pad = x_271_pad_0, pad_type = x_271_pad_type_0, strides = x_271_strides_0, weight = model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_335_cast_fp16)[name = string("x_271_cast_fp16")]; - string b_33_pad_type_0 = const()[name = string("b_33_pad_type_0"), val = string("valid")]; - tensor b_33_strides_0 = const()[name = string("b_33_strides_0"), val = tensor([1, 1])]; - tensor b_33_pad_0 = const()[name = string("b_33_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_33_dilations_0 = const()[name = string("b_33_dilations_0"), val = tensor([1, 1])]; - int32 b_33_groups_0 = const()[name = string("b_33_groups_0"), val = int32(1)]; - tensor model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1079488640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1085460672))))[name = string("model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_33_cast_fp16 = conv(dilations = b_33_dilations_0, groups = b_33_groups_0, pad = b_33_pad_0, pad_type = b_33_pad_type_0, strides = b_33_strides_0, weight = model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_335_cast_fp16)[name = string("b_33_cast_fp16")]; - string var_13360_mode_0 = const()[name = string("op_13360_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_13360_cast_fp16 = gelu(mode = var_13360_mode_0, x = x_271_cast_fp16)[name = string("op_13360_cast_fp16")]; - tensor input_337_cast_fp16 = mul(x = var_13360_cast_fp16, y = b_33_cast_fp16)[name = string("input_337_cast_fp16")]; - string e_33_pad_type_0 = const()[name = string("e_33_pad_type_0"), val = string("valid")]; - tensor e_33_strides_0 = const()[name = string("e_33_strides_0"), val = tensor([1, 1])]; - tensor e_33_pad_0 = const()[name = string("e_33_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_33_dilations_0 = const()[name = string("e_33_dilations_0"), val = tensor([1, 1])]; - int32 e_33_groups_0 = const()[name = string("e_33_groups_0"), val = int32(1)]; - tensor model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370125312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376097344))))[name = string("model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_33_cast_fp16 = conv(dilations = e_33_dilations_0, groups = e_33_groups_0, pad = e_33_pad_0, pad_type = e_33_pad_type_0, strides = e_33_strides_0, weight = model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_337_cast_fp16)[name = string("e_33_cast_fp16")]; - tensor var_13368_axes_0 = const()[name = string("op_13368_axes_0"), val = tensor([2])]; - tensor var_13368_cast_fp16 = squeeze(axes = var_13368_axes_0, x = e_33_cast_fp16)[name = string("op_13368_cast_fp16")]; - tensor var_13369 = const()[name = string("op_13369"), val = tensor([0, 2, 1])]; - int32 var_13380 = const()[name = string("op_13380"), val = int32(-1)]; - fp16 const_727_promoted_to_fp16 = const()[name = string("const_727_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_269_cast_fp16 = transpose(perm = var_13369, x = var_13368_cast_fp16)[name = string("transpose_81")]; - tensor var_13382_cast_fp16 = mul(x = hidden_states_269_cast_fp16, y = const_727_promoted_to_fp16)[name = string("op_13382_cast_fp16")]; - bool input_339_interleave_0 = const()[name = string("input_339_interleave_0"), val = bool(false)]; - tensor input_339_cast_fp16 = concat(axis = var_13380, interleave = input_339_interleave_0, values = (hidden_states_269_cast_fp16, var_13382_cast_fp16))[name = string("input_339_cast_fp16")]; - tensor normed_405_axes_0 = const()[name = string("normed_405_axes_0"), val = tensor([-1])]; - fp16 var_13377_to_fp16 = const()[name = string("op_13377_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_405_cast_fp16 = layer_norm(axes = normed_405_axes_0, epsilon = var_13377_to_fp16, x = input_339_cast_fp16)[name = string("normed_405_cast_fp16")]; - tensor normed_407_begin_0 = const()[name = string("normed_407_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_407_end_0 = const()[name = string("normed_407_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_407_end_mask_0 = const()[name = string("normed_407_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_407_cast_fp16 = slice_by_index(begin = normed_407_begin_0, end = normed_407_end_0, end_mask = normed_407_end_mask_0, x = normed_405_cast_fp16)[name = string("normed_407_cast_fp16")]; - tensor var_13396_to_fp16 = const()[name = string("op_13396_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376115840)))]; - tensor hidden_states_271_cast_fp16 = mul(x = normed_407_cast_fp16, y = var_13396_to_fp16)[name = string("hidden_states_271_cast_fp16")]; - tensor hidden_states_273_cast_fp16 = add(x = hidden_states_267_cast_fp16, y = hidden_states_271_cast_fp16)[name = string("hidden_states_273_cast_fp16")]; - int32 var_13450 = const()[name = string("op_13450"), val = int32(-1)]; - fp16 const_732_promoted_to_fp16 = const()[name = string("const_732_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13452_cast_fp16 = mul(x = hidden_states_273_cast_fp16, y = const_732_promoted_to_fp16)[name = string("op_13452_cast_fp16")]; - bool input_341_interleave_0 = const()[name = string("input_341_interleave_0"), val = bool(false)]; - tensor input_341_cast_fp16 = concat(axis = var_13450, interleave = input_341_interleave_0, values = (hidden_states_273_cast_fp16, var_13452_cast_fp16))[name = string("input_341_cast_fp16")]; - tensor normed_409_axes_0 = const()[name = string("normed_409_axes_0"), val = tensor([-1])]; - fp16 var_13447_to_fp16 = const()[name = string("op_13447_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_409_cast_fp16 = layer_norm(axes = normed_409_axes_0, epsilon = var_13447_to_fp16, x = input_341_cast_fp16)[name = string("normed_409_cast_fp16")]; - tensor normed_411_begin_0 = const()[name = string("normed_411_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_411_end_0 = const()[name = string("normed_411_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_411_end_mask_0 = const()[name = string("normed_411_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_411_cast_fp16 = slice_by_index(begin = normed_411_begin_0, end = normed_411_end_0, end_mask = normed_411_end_mask_0, x = normed_409_cast_fp16)[name = string("normed_411_cast_fp16")]; - tensor var_13466_to_fp16 = const()[name = string("op_13466_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376118208)))]; - tensor hidden_states_275_cast_fp16 = mul(x = normed_411_cast_fp16, y = var_13466_to_fp16)[name = string("hidden_states_275_cast_fp16")]; - tensor var_13477 = const()[name = string("op_13477"), val = tensor([0, 2, 1])]; - tensor var_13480_axes_0 = const()[name = string("op_13480_axes_0"), val = tensor([2])]; - tensor var_13478_cast_fp16 = transpose(perm = var_13477, x = hidden_states_275_cast_fp16)[name = string("transpose_80")]; - tensor var_13480_cast_fp16 = expand_dims(axes = var_13480_axes_0, x = var_13478_cast_fp16)[name = string("op_13480_cast_fp16")]; - string query_states_137_pad_type_0 = const()[name = string("query_states_137_pad_type_0"), val = string("valid")]; - tensor query_states_137_strides_0 = const()[name = string("query_states_137_strides_0"), val = tensor([1, 1])]; - tensor query_states_137_pad_0 = const()[name = string("query_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_137_dilations_0 = const()[name = string("query_states_137_dilations_0"), val = tensor([1, 1])]; - int32 query_states_137_groups_0 = const()[name = string("query_states_137_groups_0"), val = int32(1)]; - tensor query_states_137 = conv(dilations = query_states_137_dilations_0, groups = query_states_137_groups_0, pad = query_states_137_pad_0, pad_type = query_states_137_pad_type_0, strides = query_states_137_strides_0, weight = model_model_layers_17_self_attn_q_proj_weight_palettized, x = var_13480_cast_fp16)[name = string("query_states_137")]; - string key_states_171_pad_type_0 = const()[name = string("key_states_171_pad_type_0"), val = string("valid")]; - tensor key_states_171_strides_0 = const()[name = string("key_states_171_strides_0"), val = tensor([1, 1])]; - tensor key_states_171_pad_0 = const()[name = string("key_states_171_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_171_dilations_0 = const()[name = string("key_states_171_dilations_0"), val = tensor([1, 1])]; - int32 key_states_171_groups_0 = const()[name = string("key_states_171_groups_0"), val = int32(1)]; - tensor key_states_171 = conv(dilations = key_states_171_dilations_0, groups = key_states_171_groups_0, pad = key_states_171_pad_0, pad_type = key_states_171_pad_type_0, strides = key_states_171_strides_0, weight = model_model_layers_17_self_attn_k_proj_weight_palettized, x = var_13480_cast_fp16)[name = string("key_states_171")]; - string value_states_137_pad_type_0 = const()[name = string("value_states_137_pad_type_0"), val = string("valid")]; - tensor value_states_137_strides_0 = const()[name = string("value_states_137_strides_0"), val = tensor([1, 1])]; - tensor value_states_137_pad_0 = const()[name = string("value_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_137_dilations_0 = const()[name = string("value_states_137_dilations_0"), val = tensor([1, 1])]; - int32 value_states_137_groups_0 = const()[name = string("value_states_137_groups_0"), val = int32(1)]; - tensor value_states_137 = conv(dilations = value_states_137_dilations_0, groups = value_states_137_groups_0, pad = value_states_137_pad_0, pad_type = value_states_137_pad_type_0, strides = value_states_137_strides_0, weight = model_model_layers_17_self_attn_v_proj_weight_palettized, x = var_13480_cast_fp16)[name = string("value_states_137")]; - tensor var_13522 = const()[name = string("op_13522"), val = tensor([1, 4, 256, 64])]; - tensor var_13523 = reshape(shape = var_13522, x = query_states_137)[name = string("op_13523")]; - tensor var_13528 = const()[name = string("op_13528"), val = tensor([0, 1, 3, 2])]; - tensor var_13533 = const()[name = string("op_13533"), val = tensor([1, 1, 256, 64])]; - tensor var_13534 = reshape(shape = var_13533, x = key_states_171)[name = string("op_13534")]; - tensor var_13539 = const()[name = string("op_13539"), val = tensor([0, 1, 3, 2])]; - tensor var_13544 = const()[name = string("op_13544"), val = tensor([1, 1, 256, 64])]; - tensor var_13545 = reshape(shape = var_13544, x = value_states_137)[name = string("op_13545")]; - tensor var_13550 = const()[name = string("op_13550"), val = tensor([0, 1, 3, 2])]; - int32 var_13561 = const()[name = string("op_13561"), val = int32(-1)]; - fp16 const_737_promoted = const()[name = string("const_737_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_277 = transpose(perm = var_13528, x = var_13523)[name = string("transpose_79")]; - tensor var_13563 = mul(x = hidden_states_277, y = const_737_promoted)[name = string("op_13563")]; - bool input_345_interleave_0 = const()[name = string("input_345_interleave_0"), val = bool(false)]; - tensor input_345 = concat(axis = var_13561, interleave = input_345_interleave_0, values = (hidden_states_277, var_13563))[name = string("input_345")]; - tensor normed_413_axes_0 = const()[name = string("normed_413_axes_0"), val = tensor([-1])]; - fp16 var_13558_to_fp16 = const()[name = string("op_13558_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_413_cast_fp16 = layer_norm(axes = normed_413_axes_0, epsilon = var_13558_to_fp16, x = input_345)[name = string("normed_413_cast_fp16")]; - tensor normed_415_begin_0 = const()[name = string("normed_415_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_415_end_0 = const()[name = string("normed_415_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_415_end_mask_0 = const()[name = string("normed_415_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_415 = slice_by_index(begin = normed_415_begin_0, end = normed_415_end_0, end_mask = normed_415_end_mask_0, x = normed_413_cast_fp16)[name = string("normed_415")]; - tensor var_13577_to_fp16 = const()[name = string("op_13577_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376120576)))]; - tensor q_35_cast_fp16 = mul(x = normed_415, y = var_13577_to_fp16)[name = string("q_35_cast_fp16")]; - int32 var_13588 = const()[name = string("op_13588"), val = int32(-1)]; - fp16 const_741_promoted = const()[name = string("const_741_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_279 = transpose(perm = var_13539, x = var_13534)[name = string("transpose_78")]; - tensor var_13590 = mul(x = hidden_states_279, y = const_741_promoted)[name = string("op_13590")]; - bool input_347_interleave_0 = const()[name = string("input_347_interleave_0"), val = bool(false)]; - tensor input_347 = concat(axis = var_13588, interleave = input_347_interleave_0, values = (hidden_states_279, var_13590))[name = string("input_347")]; - tensor normed_417_axes_0 = const()[name = string("normed_417_axes_0"), val = tensor([-1])]; - fp16 var_13585_to_fp16 = const()[name = string("op_13585_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_417_cast_fp16 = layer_norm(axes = normed_417_axes_0, epsilon = var_13585_to_fp16, x = input_347)[name = string("normed_417_cast_fp16")]; - tensor normed_419_begin_0 = const()[name = string("normed_419_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_419_end_0 = const()[name = string("normed_419_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_419_end_mask_0 = const()[name = string("normed_419_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_419 = slice_by_index(begin = normed_419_begin_0, end = normed_419_end_0, end_mask = normed_419_end_mask_0, x = normed_417_cast_fp16)[name = string("normed_419")]; - tensor var_13604_to_fp16 = const()[name = string("op_13604_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376121152)))]; - tensor k_35_cast_fp16 = mul(x = normed_419, y = var_13604_to_fp16)[name = string("k_35_cast_fp16")]; - tensor var_13618_cast_fp16 = mul(x = q_35_cast_fp16, y = cos_35)[name = string("op_13618_cast_fp16")]; - tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_69_cast_fp16 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_35_cast_fp16)[name = string("x1_69_cast_fp16")]; - tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_69_cast_fp16 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_35_cast_fp16)[name = string("x2_69_cast_fp16")]; - fp16 const_747_promoted_to_fp16 = const()[name = string("const_747_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13639_cast_fp16 = mul(x = x2_69_cast_fp16, y = const_747_promoted_to_fp16)[name = string("op_13639_cast_fp16")]; - int32 var_13641 = const()[name = string("op_13641"), val = int32(-1)]; - bool var_13642_interleave_0 = const()[name = string("op_13642_interleave_0"), val = bool(false)]; - tensor var_13642_cast_fp16 = concat(axis = var_13641, interleave = var_13642_interleave_0, values = (var_13639_cast_fp16, x1_69_cast_fp16))[name = string("op_13642_cast_fp16")]; - tensor var_13643_cast_fp16 = mul(x = var_13642_cast_fp16, y = sin_35)[name = string("op_13643_cast_fp16")]; - tensor query_states_139_cast_fp16 = add(x = var_13618_cast_fp16, y = var_13643_cast_fp16)[name = string("query_states_139_cast_fp16")]; - tensor var_13646_cast_fp16 = mul(x = k_35_cast_fp16, y = cos_35)[name = string("op_13646_cast_fp16")]; - tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_71_cast_fp16 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_35_cast_fp16)[name = string("x1_71_cast_fp16")]; - tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_71_cast_fp16 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_35_cast_fp16)[name = string("x2_71_cast_fp16")]; - fp16 const_750_promoted_to_fp16 = const()[name = string("const_750_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13667_cast_fp16 = mul(x = x2_71_cast_fp16, y = const_750_promoted_to_fp16)[name = string("op_13667_cast_fp16")]; - int32 var_13669 = const()[name = string("op_13669"), val = int32(-1)]; - bool var_13670_interleave_0 = const()[name = string("op_13670_interleave_0"), val = bool(false)]; - tensor var_13670_cast_fp16 = concat(axis = var_13669, interleave = var_13670_interleave_0, values = (var_13667_cast_fp16, x1_71_cast_fp16))[name = string("op_13670_cast_fp16")]; - tensor var_13671_cast_fp16 = mul(x = var_13670_cast_fp16, y = sin_35)[name = string("op_13671_cast_fp16")]; - tensor key_states_173_cast_fp16 = add(x = var_13646_cast_fp16, y = var_13671_cast_fp16)[name = string("key_states_173_cast_fp16")]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_5_stride_0, update = key_states_173_cast_fp16, x = coreml_update_state_75)[name = string("model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_138_write_state")]; - tensor coreml_update_state_86 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_138")]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_139 = transpose(perm = var_13550, x = var_13545)[name = string("transpose_77")]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_128, begin_mask = model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0, end = concat_129, end_mask = model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_6_stride_0, update = value_states_139, x = coreml_update_state_86)[name = string("model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_139_write_state")]; - tensor coreml_update_state_87 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_139")]; - tensor var_13770_begin_0 = const()[name = string("op_13770_begin_0"), val = tensor([2, 0, 0, 0])]; - tensor var_13770_end_0 = const()[name = string("op_13770_end_0"), val = tensor([3, 1, 4096, 256])]; - tensor var_13770_end_mask_0 = const()[name = string("op_13770_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_13770_cast_fp16 = slice_by_index(begin = var_13770_begin_0, end = var_13770_end_0, end_mask = var_13770_end_mask_0, x = coreml_update_state_87)[name = string("op_13770_cast_fp16")]; - tensor var_13777_begin_0 = const()[name = string("op_13777_begin_0"), val = tensor([6, 0, 0, 0])]; - tensor var_13777_end_0 = const()[name = string("op_13777_end_0"), val = tensor([7, 1, 4096, 256])]; - tensor var_13777_end_mask_0 = const()[name = string("op_13777_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_13777_cast_fp16 = slice_by_index(begin = var_13777_begin_0, end = var_13777_end_0, end_mask = var_13777_end_mask_0, x = coreml_update_state_87)[name = string("op_13777_cast_fp16")]; - tensor var_13816 = const()[name = string("op_13816"), val = tensor([1, 4, 1, 1])]; - tensor x_277_cast_fp16 = tile(reps = var_13816, x = var_13770_cast_fp16)[name = string("x_277_cast_fp16")]; - tensor var_13836 = const()[name = string("op_13836"), val = tensor([1, 4, 1, 1])]; - tensor x_283_cast_fp16 = tile(reps = var_13836, x = var_13777_cast_fp16)[name = string("x_283_cast_fp16")]; - bool var_13863_transpose_x_0 = const()[name = string("op_13863_transpose_x_0"), val = bool(false)]; - bool var_13863_transpose_y_0 = const()[name = string("op_13863_transpose_y_0"), val = bool(true)]; - tensor var_13863 = matmul(transpose_x = var_13863_transpose_x_0, transpose_y = var_13863_transpose_y_0, x = query_states_139_cast_fp16, y = x_277_cast_fp16)[name = string("op_13863")]; - fp16 var_13864_to_fp16 = const()[name = string("op_13864_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_69_cast_fp16 = mul(x = var_13863, y = var_13864_to_fp16)[name = string("attn_weights_69_cast_fp16")]; - tensor attn_weights_71_cast_fp16 = add(x = attn_weights_69_cast_fp16, y = causal_mask)[name = string("attn_weights_71_cast_fp16")]; - int32 var_13899 = const()[name = string("op_13899"), val = int32(-1)]; - tensor var_13901_cast_fp16 = softmax(axis = var_13899, x = attn_weights_71_cast_fp16)[name = string("op_13901_cast_fp16")]; - tensor concat_318 = const()[name = string("concat_318"), val = tensor([4, 64, 4096])]; - tensor reshape_51_cast_fp16 = reshape(shape = concat_318, x = var_13901_cast_fp16)[name = string("reshape_51_cast_fp16")]; - tensor concat_319 = const()[name = string("concat_319"), val = tensor([4, 4096, 256])]; - tensor reshape_52_cast_fp16 = reshape(shape = concat_319, x = x_283_cast_fp16)[name = string("reshape_52_cast_fp16")]; - bool matmul_17_transpose_x_0 = const()[name = string("matmul_17_transpose_x_0"), val = bool(false)]; - bool matmul_17_transpose_y_0 = const()[name = string("matmul_17_transpose_y_0"), val = bool(false)]; - tensor matmul_17_cast_fp16 = matmul(transpose_x = matmul_17_transpose_x_0, transpose_y = matmul_17_transpose_y_0, x = reshape_51_cast_fp16, y = reshape_52_cast_fp16)[name = string("matmul_17_cast_fp16")]; - tensor concat_323 = const()[name = string("concat_323"), val = tensor([1, 4, 64, 256])]; - tensor reshape_53_cast_fp16 = reshape(shape = concat_323, x = matmul_17_cast_fp16)[name = string("reshape_53_cast_fp16")]; - tensor var_13913_perm_0 = const()[name = string("op_13913_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_13932 = const()[name = string("op_13932"), val = tensor([1, 64, 1024])]; - tensor var_13913_cast_fp16 = transpose(perm = var_13913_perm_0, x = reshape_53_cast_fp16)[name = string("transpose_76")]; - tensor attn_output_175_cast_fp16 = reshape(shape = var_13932, x = var_13913_cast_fp16)[name = string("attn_output_175_cast_fp16")]; - tensor var_13937 = const()[name = string("op_13937"), val = tensor([0, 2, 1])]; - string var_13953_pad_type_0 = const()[name = string("op_13953_pad_type_0"), val = string("valid")]; - int32 var_13953_groups_0 = const()[name = string("op_13953_groups_0"), val = int32(1)]; - tensor var_13953_strides_0 = const()[name = string("op_13953_strides_0"), val = tensor([1])]; - tensor var_13953_pad_0 = const()[name = string("op_13953_pad_0"), val = tensor([0, 0])]; - tensor var_13953_dilations_0 = const()[name = string("op_13953_dilations_0"), val = tensor([1])]; - tensor squeeze_17_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376121728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377006528))))[name = string("squeeze_17_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_13938_cast_fp16 = transpose(perm = var_13937, x = attn_output_175_cast_fp16)[name = string("transpose_75")]; - tensor var_13953_cast_fp16 = conv(dilations = var_13953_dilations_0, groups = var_13953_groups_0, pad = var_13953_pad_0, pad_type = var_13953_pad_type_0, strides = var_13953_strides_0, weight = squeeze_17_cast_fp16_to_fp32_to_fp16_palettized, x = var_13938_cast_fp16)[name = string("op_13953_cast_fp16")]; - tensor var_13957 = const()[name = string("op_13957"), val = tensor([0, 2, 1])]; - int32 var_13968 = const()[name = string("op_13968"), val = int32(-1)]; - fp16 const_762_promoted_to_fp16 = const()[name = string("const_762_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_281_cast_fp16 = transpose(perm = var_13957, x = var_13953_cast_fp16)[name = string("transpose_74")]; - tensor var_13970_cast_fp16 = mul(x = hidden_states_281_cast_fp16, y = const_762_promoted_to_fp16)[name = string("op_13970_cast_fp16")]; - bool input_351_interleave_0 = const()[name = string("input_351_interleave_0"), val = bool(false)]; - tensor input_351_cast_fp16 = concat(axis = var_13968, interleave = input_351_interleave_0, values = (hidden_states_281_cast_fp16, var_13970_cast_fp16))[name = string("input_351_cast_fp16")]; - tensor normed_421_axes_0 = const()[name = string("normed_421_axes_0"), val = tensor([-1])]; - fp16 var_13965_to_fp16 = const()[name = string("op_13965_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_421_cast_fp16 = layer_norm(axes = normed_421_axes_0, epsilon = var_13965_to_fp16, x = input_351_cast_fp16)[name = string("normed_421_cast_fp16")]; - tensor normed_423_begin_0 = const()[name = string("normed_423_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_423_end_0 = const()[name = string("normed_423_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_423_end_mask_0 = const()[name = string("normed_423_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_423_cast_fp16 = slice_by_index(begin = normed_423_begin_0, end = normed_423_end_0, end_mask = normed_423_end_mask_0, x = normed_421_cast_fp16)[name = string("normed_423_cast_fp16")]; - tensor var_13984_to_fp16 = const()[name = string("op_13984_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377025024)))]; - tensor attn_output_179_cast_fp16 = mul(x = normed_423_cast_fp16, y = var_13984_to_fp16)[name = string("attn_output_179_cast_fp16")]; - tensor hidden_states_283_cast_fp16 = add(x = hidden_states_273_cast_fp16, y = attn_output_179_cast_fp16)[name = string("hidden_states_283_cast_fp16")]; - int32 var_13997 = const()[name = string("op_13997"), val = int32(-1)]; - fp16 const_766_promoted_to_fp16 = const()[name = string("const_766_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13999_cast_fp16 = mul(x = hidden_states_283_cast_fp16, y = const_766_promoted_to_fp16)[name = string("op_13999_cast_fp16")]; - bool input_353_interleave_0 = const()[name = string("input_353_interleave_0"), val = bool(false)]; - tensor input_353_cast_fp16 = concat(axis = var_13997, interleave = input_353_interleave_0, values = (hidden_states_283_cast_fp16, var_13999_cast_fp16))[name = string("input_353_cast_fp16")]; - tensor normed_425_axes_0 = const()[name = string("normed_425_axes_0"), val = tensor([-1])]; - fp16 var_13994_to_fp16 = const()[name = string("op_13994_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_425_cast_fp16 = layer_norm(axes = normed_425_axes_0, epsilon = var_13994_to_fp16, x = input_353_cast_fp16)[name = string("normed_425_cast_fp16")]; - tensor normed_427_begin_0 = const()[name = string("normed_427_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_427_end_0 = const()[name = string("normed_427_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_427_end_mask_0 = const()[name = string("normed_427_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_427_cast_fp16 = slice_by_index(begin = normed_427_begin_0, end = normed_427_end_0, end_mask = normed_427_end_mask_0, x = normed_425_cast_fp16)[name = string("normed_427_cast_fp16")]; - tensor var_14013_to_fp16 = const()[name = string("op_14013_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377027392)))]; - tensor x_285_cast_fp16 = mul(x = normed_427_cast_fp16, y = var_14013_to_fp16)[name = string("x_285_cast_fp16")]; - tensor var_14025 = const()[name = string("op_14025"), val = tensor([0, 2, 1])]; - tensor input_355_axes_0 = const()[name = string("input_355_axes_0"), val = tensor([2])]; - tensor var_14026_cast_fp16 = transpose(perm = var_14025, x = x_285_cast_fp16)[name = string("transpose_73")]; - tensor input_355_cast_fp16 = expand_dims(axes = input_355_axes_0, x = var_14026_cast_fp16)[name = string("input_355_cast_fp16")]; - string x_287_pad_type_0 = const()[name = string("x_287_pad_type_0"), val = string("valid")]; - tensor x_287_strides_0 = const()[name = string("x_287_strides_0"), val = tensor([1, 1])]; - tensor x_287_pad_0 = const()[name = string("x_287_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_287_dilations_0 = const()[name = string("x_287_dilations_0"), val = tensor([1, 1])]; - int32 x_287_groups_0 = const()[name = string("x_287_groups_0"), val = int32(1)]; - tensor model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1085571328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1091543360))))[name = string("model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_287_cast_fp16 = conv(dilations = x_287_dilations_0, groups = x_287_groups_0, pad = x_287_pad_0, pad_type = x_287_pad_type_0, strides = x_287_strides_0, weight = model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_355_cast_fp16)[name = string("x_287_cast_fp16")]; - string b_35_pad_type_0 = const()[name = string("b_35_pad_type_0"), val = string("valid")]; - tensor b_35_strides_0 = const()[name = string("b_35_strides_0"), val = tensor([1, 1])]; - tensor b_35_pad_0 = const()[name = string("b_35_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_35_dilations_0 = const()[name = string("b_35_dilations_0"), val = tensor([1, 1])]; - int32 b_35_groups_0 = const()[name = string("b_35_groups_0"), val = int32(1)]; - tensor model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1091654016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1097626048))))[name = string("model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_35_cast_fp16 = conv(dilations = b_35_dilations_0, groups = b_35_groups_0, pad = b_35_pad_0, pad_type = b_35_pad_type_0, strides = b_35_strides_0, weight = model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_355_cast_fp16)[name = string("b_35_cast_fp16")]; - string var_14051_mode_0 = const()[name = string("op_14051_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_14051_cast_fp16 = gelu(mode = var_14051_mode_0, x = x_287_cast_fp16)[name = string("op_14051_cast_fp16")]; - tensor input_357_cast_fp16 = mul(x = var_14051_cast_fp16, y = b_35_cast_fp16)[name = string("input_357_cast_fp16")]; - string e_35_pad_type_0 = const()[name = string("e_35_pad_type_0"), val = string("valid")]; - tensor e_35_strides_0 = const()[name = string("e_35_strides_0"), val = tensor([1, 1])]; - tensor e_35_pad_0 = const()[name = string("e_35_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_35_dilations_0 = const()[name = string("e_35_dilations_0"), val = tensor([1, 1])]; - int32 e_35_groups_0 = const()[name = string("e_35_groups_0"), val = int32(1)]; - tensor model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389195136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395167168))))[name = string("model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_35_cast_fp16 = conv(dilations = e_35_dilations_0, groups = e_35_groups_0, pad = e_35_pad_0, pad_type = e_35_pad_type_0, strides = e_35_strides_0, weight = model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_357_cast_fp16)[name = string("e_35_cast_fp16")]; - tensor var_14059_axes_0 = const()[name = string("op_14059_axes_0"), val = tensor([2])]; - tensor var_14059_cast_fp16 = squeeze(axes = var_14059_axes_0, x = e_35_cast_fp16)[name = string("op_14059_cast_fp16")]; - tensor var_14060 = const()[name = string("op_14060"), val = tensor([0, 2, 1])]; - int32 var_14071 = const()[name = string("op_14071"), val = int32(-1)]; - fp16 const_770_promoted_to_fp16 = const()[name = string("const_770_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_285_cast_fp16 = transpose(perm = var_14060, x = var_14059_cast_fp16)[name = string("transpose_72")]; - tensor var_14073_cast_fp16 = mul(x = hidden_states_285_cast_fp16, y = const_770_promoted_to_fp16)[name = string("op_14073_cast_fp16")]; - bool input_359_interleave_0 = const()[name = string("input_359_interleave_0"), val = bool(false)]; - tensor input_359_cast_fp16 = concat(axis = var_14071, interleave = input_359_interleave_0, values = (hidden_states_285_cast_fp16, var_14073_cast_fp16))[name = string("input_359_cast_fp16")]; - tensor normed_429_axes_0 = const()[name = string("normed_429_axes_0"), val = tensor([-1])]; - fp16 var_14068_to_fp16 = const()[name = string("op_14068_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_429_cast_fp16 = layer_norm(axes = normed_429_axes_0, epsilon = var_14068_to_fp16, x = input_359_cast_fp16)[name = string("normed_429_cast_fp16")]; - tensor normed_431_begin_0 = const()[name = string("normed_431_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_431_end_0 = const()[name = string("normed_431_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_431_end_mask_0 = const()[name = string("normed_431_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_431_cast_fp16 = slice_by_index(begin = normed_431_begin_0, end = normed_431_end_0, end_mask = normed_431_end_mask_0, x = normed_429_cast_fp16)[name = string("normed_431_cast_fp16")]; - tensor var_14087_to_fp16 = const()[name = string("op_14087_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395185664)))]; - tensor hidden_states_287_cast_fp16 = mul(x = normed_431_cast_fp16, y = var_14087_to_fp16)[name = string("hidden_states_287_cast_fp16")]; - tensor hidden_states_289_cast_fp16 = add(x = hidden_states_283_cast_fp16, y = hidden_states_287_cast_fp16)[name = string("hidden_states_289_cast_fp16")]; - int32 var_14141 = const()[name = string("op_14141"), val = int32(-1)]; - fp16 const_775_promoted_to_fp16 = const()[name = string("const_775_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14143_cast_fp16 = mul(x = hidden_states_289_cast_fp16, y = const_775_promoted_to_fp16)[name = string("op_14143_cast_fp16")]; - bool input_361_interleave_0 = const()[name = string("input_361_interleave_0"), val = bool(false)]; - tensor input_361_cast_fp16 = concat(axis = var_14141, interleave = input_361_interleave_0, values = (hidden_states_289_cast_fp16, var_14143_cast_fp16))[name = string("input_361_cast_fp16")]; - tensor normed_433_axes_0 = const()[name = string("normed_433_axes_0"), val = tensor([-1])]; - fp16 var_14138_to_fp16 = const()[name = string("op_14138_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_433_cast_fp16 = layer_norm(axes = normed_433_axes_0, epsilon = var_14138_to_fp16, x = input_361_cast_fp16)[name = string("normed_433_cast_fp16")]; - tensor normed_435_begin_0 = const()[name = string("normed_435_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_435_end_0 = const()[name = string("normed_435_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_435_end_mask_0 = const()[name = string("normed_435_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_435_cast_fp16 = slice_by_index(begin = normed_435_begin_0, end = normed_435_end_0, end_mask = normed_435_end_mask_0, x = normed_433_cast_fp16)[name = string("normed_435_cast_fp16")]; - tensor var_14157_to_fp16 = const()[name = string("op_14157_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395188032)))]; - tensor hidden_states_291_cast_fp16 = mul(x = normed_435_cast_fp16, y = var_14157_to_fp16)[name = string("hidden_states_291_cast_fp16")]; - tensor var_14168 = const()[name = string("op_14168"), val = tensor([0, 2, 1])]; - tensor var_14171_axes_0 = const()[name = string("op_14171_axes_0"), val = tensor([2])]; - tensor var_14169_cast_fp16 = transpose(perm = var_14168, x = hidden_states_291_cast_fp16)[name = string("transpose_71")]; - tensor var_14171_cast_fp16 = expand_dims(axes = var_14171_axes_0, x = var_14169_cast_fp16)[name = string("op_14171_cast_fp16")]; - string query_states_145_pad_type_0 = const()[name = string("query_states_145_pad_type_0"), val = string("valid")]; - tensor query_states_145_strides_0 = const()[name = string("query_states_145_strides_0"), val = tensor([1, 1])]; - tensor query_states_145_pad_0 = const()[name = string("query_states_145_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_145_dilations_0 = const()[name = string("query_states_145_dilations_0"), val = tensor([1, 1])]; - int32 query_states_145_groups_0 = const()[name = string("query_states_145_groups_0"), val = int32(1)]; - tensor query_states_145 = conv(dilations = query_states_145_dilations_0, groups = query_states_145_groups_0, pad = query_states_145_pad_0, pad_type = query_states_145_pad_type_0, strides = query_states_145_strides_0, weight = model_model_layers_18_self_attn_q_proj_weight_palettized, x = var_14171_cast_fp16)[name = string("query_states_145")]; - string key_states_181_pad_type_0 = const()[name = string("key_states_181_pad_type_0"), val = string("valid")]; - tensor key_states_181_strides_0 = const()[name = string("key_states_181_strides_0"), val = tensor([1, 1])]; - tensor key_states_181_pad_0 = const()[name = string("key_states_181_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_181_dilations_0 = const()[name = string("key_states_181_dilations_0"), val = tensor([1, 1])]; - int32 key_states_181_groups_0 = const()[name = string("key_states_181_groups_0"), val = int32(1)]; - tensor key_states_181 = conv(dilations = key_states_181_dilations_0, groups = key_states_181_groups_0, pad = key_states_181_pad_0, pad_type = key_states_181_pad_type_0, strides = key_states_181_strides_0, weight = model_model_layers_18_self_attn_k_proj_weight_palettized, x = var_14171_cast_fp16)[name = string("key_states_181")]; - string value_states_145_pad_type_0 = const()[name = string("value_states_145_pad_type_0"), val = string("valid")]; - tensor value_states_145_strides_0 = const()[name = string("value_states_145_strides_0"), val = tensor([1, 1])]; - tensor value_states_145_pad_0 = const()[name = string("value_states_145_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_145_dilations_0 = const()[name = string("value_states_145_dilations_0"), val = tensor([1, 1])]; - int32 value_states_145_groups_0 = const()[name = string("value_states_145_groups_0"), val = int32(1)]; - tensor value_states_145 = conv(dilations = value_states_145_dilations_0, groups = value_states_145_groups_0, pad = value_states_145_pad_0, pad_type = value_states_145_pad_type_0, strides = value_states_145_strides_0, weight = model_model_layers_18_self_attn_v_proj_weight_palettized, x = var_14171_cast_fp16)[name = string("value_states_145")]; - tensor var_14213 = const()[name = string("op_14213"), val = tensor([1, 4, 256, 64])]; - tensor var_14214 = reshape(shape = var_14213, x = query_states_145)[name = string("op_14214")]; - tensor var_14219 = const()[name = string("op_14219"), val = tensor([0, 1, 3, 2])]; - tensor var_14224 = const()[name = string("op_14224"), val = tensor([1, 1, 256, 64])]; - tensor var_14225 = reshape(shape = var_14224, x = key_states_181)[name = string("op_14225")]; - tensor var_14230 = const()[name = string("op_14230"), val = tensor([0, 1, 3, 2])]; - tensor var_14235 = const()[name = string("op_14235"), val = tensor([1, 1, 256, 64])]; - tensor var_14236 = reshape(shape = var_14235, x = value_states_145)[name = string("op_14236")]; - tensor var_14241 = const()[name = string("op_14241"), val = tensor([0, 1, 3, 2])]; - int32 var_14252 = const()[name = string("op_14252"), val = int32(-1)]; - fp16 const_780_promoted = const()[name = string("const_780_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_293 = transpose(perm = var_14219, x = var_14214)[name = string("transpose_70")]; - tensor var_14254 = mul(x = hidden_states_293, y = const_780_promoted)[name = string("op_14254")]; - bool input_365_interleave_0 = const()[name = string("input_365_interleave_0"), val = bool(false)]; - tensor input_365 = concat(axis = var_14252, interleave = input_365_interleave_0, values = (hidden_states_293, var_14254))[name = string("input_365")]; - tensor normed_437_axes_0 = const()[name = string("normed_437_axes_0"), val = tensor([-1])]; - fp16 var_14249_to_fp16 = const()[name = string("op_14249_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_437_cast_fp16 = layer_norm(axes = normed_437_axes_0, epsilon = var_14249_to_fp16, x = input_365)[name = string("normed_437_cast_fp16")]; - tensor normed_439_begin_0 = const()[name = string("normed_439_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_439_end_0 = const()[name = string("normed_439_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_439_end_mask_0 = const()[name = string("normed_439_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_439 = slice_by_index(begin = normed_439_begin_0, end = normed_439_end_0, end_mask = normed_439_end_mask_0, x = normed_437_cast_fp16)[name = string("normed_439")]; - tensor var_14268_to_fp16 = const()[name = string("op_14268_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395190400)))]; - tensor q_37_cast_fp16 = mul(x = normed_439, y = var_14268_to_fp16)[name = string("q_37_cast_fp16")]; - int32 var_14279 = const()[name = string("op_14279"), val = int32(-1)]; - fp16 const_784_promoted = const()[name = string("const_784_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_295 = transpose(perm = var_14230, x = var_14225)[name = string("transpose_69")]; - tensor var_14281 = mul(x = hidden_states_295, y = const_784_promoted)[name = string("op_14281")]; - bool input_367_interleave_0 = const()[name = string("input_367_interleave_0"), val = bool(false)]; - tensor input_367 = concat(axis = var_14279, interleave = input_367_interleave_0, values = (hidden_states_295, var_14281))[name = string("input_367")]; - tensor normed_441_axes_0 = const()[name = string("normed_441_axes_0"), val = tensor([-1])]; - fp16 var_14276_to_fp16 = const()[name = string("op_14276_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_441_cast_fp16 = layer_norm(axes = normed_441_axes_0, epsilon = var_14276_to_fp16, x = input_367)[name = string("normed_441_cast_fp16")]; - tensor normed_443_begin_0 = const()[name = string("normed_443_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_443_end_0 = const()[name = string("normed_443_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_443_end_mask_0 = const()[name = string("normed_443_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_443 = slice_by_index(begin = normed_443_begin_0, end = normed_443_end_0, end_mask = normed_443_end_mask_0, x = normed_441_cast_fp16)[name = string("normed_443")]; - tensor var_14295_to_fp16 = const()[name = string("op_14295_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395190976)))]; - tensor k_37_cast_fp16 = mul(x = normed_443, y = var_14295_to_fp16)[name = string("k_37_cast_fp16")]; - tensor var_14309_cast_fp16 = mul(x = q_37_cast_fp16, y = cos_5)[name = string("op_14309_cast_fp16")]; - tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_73_cast_fp16 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_37_cast_fp16)[name = string("x1_73_cast_fp16")]; - tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_73_cast_fp16 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_37_cast_fp16)[name = string("x2_73_cast_fp16")]; - fp16 const_790_promoted_to_fp16 = const()[name = string("const_790_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14330_cast_fp16 = mul(x = x2_73_cast_fp16, y = const_790_promoted_to_fp16)[name = string("op_14330_cast_fp16")]; - int32 var_14332 = const()[name = string("op_14332"), val = int32(-1)]; - bool var_14333_interleave_0 = const()[name = string("op_14333_interleave_0"), val = bool(false)]; - tensor var_14333_cast_fp16 = concat(axis = var_14332, interleave = var_14333_interleave_0, values = (var_14330_cast_fp16, x1_73_cast_fp16))[name = string("op_14333_cast_fp16")]; - tensor var_14334_cast_fp16 = mul(x = var_14333_cast_fp16, y = sin_5)[name = string("op_14334_cast_fp16")]; - tensor query_states_147_cast_fp16 = add(x = var_14309_cast_fp16, y = var_14334_cast_fp16)[name = string("query_states_147_cast_fp16")]; - tensor var_14337_cast_fp16 = mul(x = k_37_cast_fp16, y = cos_5)[name = string("op_14337_cast_fp16")]; - tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_75_cast_fp16 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_37_cast_fp16)[name = string("x1_75_cast_fp16")]; - tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_75_cast_fp16 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_37_cast_fp16)[name = string("x2_75_cast_fp16")]; - fp16 const_793_promoted_to_fp16 = const()[name = string("const_793_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14358_cast_fp16 = mul(x = x2_75_cast_fp16, y = const_793_promoted_to_fp16)[name = string("op_14358_cast_fp16")]; - int32 var_14360 = const()[name = string("op_14360"), val = int32(-1)]; - bool var_14361_interleave_0 = const()[name = string("op_14361_interleave_0"), val = bool(false)]; - tensor var_14361_cast_fp16 = concat(axis = var_14360, interleave = var_14361_interleave_0, values = (var_14358_cast_fp16, x1_75_cast_fp16))[name = string("op_14361_cast_fp16")]; - tensor var_14362_cast_fp16 = mul(x = var_14361_cast_fp16, y = sin_5)[name = string("op_14362_cast_fp16")]; - tensor key_states_183_cast_fp16 = add(x = var_14337_cast_fp16, y = var_14362_cast_fp16)[name = string("key_states_183_cast_fp16")]; - tensor expand_dims_216 = const()[name = string("expand_dims_216"), val = tensor([15])]; - tensor expand_dims_217 = const()[name = string("expand_dims_217"), val = tensor([0])]; - tensor expand_dims_219 = const()[name = string("expand_dims_219"), val = tensor([0])]; - tensor expand_dims_220 = const()[name = string("expand_dims_220"), val = tensor([16])]; - int32 concat_326_axis_0 = const()[name = string("concat_326_axis_0"), val = int32(0)]; - bool concat_326_interleave_0 = const()[name = string("concat_326_interleave_0"), val = bool(false)]; - tensor concat_326 = concat(axis = concat_326_axis_0, interleave = concat_326_interleave_0, values = (expand_dims_216, expand_dims_217, current_pos, expand_dims_219))[name = string("concat_326")]; - tensor concat_327_values1_0 = const()[name = string("concat_327_values1_0"), val = tensor([0])]; - tensor concat_327_values3_0 = const()[name = string("concat_327_values3_0"), val = tensor([0])]; - int32 concat_327_axis_0 = const()[name = string("concat_327_axis_0"), val = int32(0)]; - bool concat_327_interleave_0 = const()[name = string("concat_327_interleave_0"), val = bool(false)]; - tensor concat_327 = concat(axis = concat_327_axis_0, interleave = concat_327_interleave_0, values = (expand_dims_220, concat_327_values1_0, end_pos_1, concat_327_values3_0))[name = string("concat_327")]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_326, begin_mask = model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0, end = concat_327, end_mask = model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_31_stride_0, update = key_states_183_cast_fp16, x = coreml_update_state_85)[name = string("model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_140_write_state")]; - tensor coreml_update_state_88 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_140")]; - tensor expand_dims_222 = const()[name = string("expand_dims_222"), val = tensor([37])]; - tensor expand_dims_223 = const()[name = string("expand_dims_223"), val = tensor([0])]; - tensor expand_dims_225 = const()[name = string("expand_dims_225"), val = tensor([0])]; - tensor expand_dims_226 = const()[name = string("expand_dims_226"), val = tensor([38])]; - int32 concat_330_axis_0 = const()[name = string("concat_330_axis_0"), val = int32(0)]; - bool concat_330_interleave_0 = const()[name = string("concat_330_interleave_0"), val = bool(false)]; - tensor concat_330 = concat(axis = concat_330_axis_0, interleave = concat_330_interleave_0, values = (expand_dims_222, expand_dims_223, current_pos, expand_dims_225))[name = string("concat_330")]; - tensor concat_331_values1_0 = const()[name = string("concat_331_values1_0"), val = tensor([0])]; - tensor concat_331_values3_0 = const()[name = string("concat_331_values3_0"), val = tensor([0])]; - int32 concat_331_axis_0 = const()[name = string("concat_331_axis_0"), val = int32(0)]; - bool concat_331_interleave_0 = const()[name = string("concat_331_interleave_0"), val = bool(false)]; - tensor concat_331 = concat(axis = concat_331_axis_0, interleave = concat_331_interleave_0, values = (expand_dims_226, concat_331_values1_0, end_pos_1, concat_331_values3_0))[name = string("concat_331")]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_147 = transpose(perm = var_14241, x = var_14236)[name = string("transpose_68")]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_330, begin_mask = model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0, end = concat_331, end_mask = model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_32_stride_0, update = value_states_147, x = coreml_update_state_88)[name = string("model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_141_write_state")]; - tensor coreml_update_state_89 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_141")]; - tensor var_14461_begin_0 = const()[name = string("op_14461_begin_0"), val = tensor([15, 0, 0, 0])]; - tensor var_14461_end_0 = const()[name = string("op_14461_end_0"), val = tensor([16, 1, 512, 256])]; - tensor var_14461_end_mask_0 = const()[name = string("op_14461_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_14461_cast_fp16 = slice_by_index(begin = var_14461_begin_0, end = var_14461_end_0, end_mask = var_14461_end_mask_0, x = coreml_update_state_89)[name = string("op_14461_cast_fp16")]; - tensor var_14468_begin_0 = const()[name = string("op_14468_begin_0"), val = tensor([37, 0, 0, 0])]; - tensor var_14468_end_0 = const()[name = string("op_14468_end_0"), val = tensor([38, 1, 512, 256])]; - tensor var_14468_end_mask_0 = const()[name = string("op_14468_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_14468_cast_fp16 = slice_by_index(begin = var_14468_begin_0, end = var_14468_end_0, end_mask = var_14468_end_mask_0, x = coreml_update_state_89)[name = string("op_14468_cast_fp16")]; - tensor var_14507 = const()[name = string("op_14507"), val = tensor([1, 4, 1, 1])]; - tensor x_293_cast_fp16 = tile(reps = var_14507, x = var_14461_cast_fp16)[name = string("x_293_cast_fp16")]; - tensor var_14527 = const()[name = string("op_14527"), val = tensor([1, 4, 1, 1])]; - tensor x_299_cast_fp16 = tile(reps = var_14527, x = var_14468_cast_fp16)[name = string("x_299_cast_fp16")]; - bool var_14554_transpose_x_0 = const()[name = string("op_14554_transpose_x_0"), val = bool(false)]; - bool var_14554_transpose_y_0 = const()[name = string("op_14554_transpose_y_0"), val = bool(true)]; - tensor var_14554 = matmul(transpose_x = var_14554_transpose_x_0, transpose_y = var_14554_transpose_y_0, x = query_states_147_cast_fp16, y = x_293_cast_fp16)[name = string("op_14554")]; - fp16 var_14555_to_fp16 = const()[name = string("op_14555_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_73_cast_fp16 = mul(x = var_14554, y = var_14555_to_fp16)[name = string("attn_weights_73_cast_fp16")]; - tensor attn_weights_75_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = mask_slice_1)[name = string("attn_weights_75_cast_fp16")]; - int32 var_14590 = const()[name = string("op_14590"), val = int32(-1)]; - tensor var_14592_cast_fp16 = softmax(axis = var_14590, x = attn_weights_75_cast_fp16)[name = string("op_14592_cast_fp16")]; - tensor concat_336 = const()[name = string("concat_336"), val = tensor([4, 64, 512])]; - tensor reshape_54_cast_fp16 = reshape(shape = concat_336, x = var_14592_cast_fp16)[name = string("reshape_54_cast_fp16")]; - tensor concat_337 = const()[name = string("concat_337"), val = tensor([4, 512, 256])]; - tensor reshape_55_cast_fp16 = reshape(shape = concat_337, x = x_299_cast_fp16)[name = string("reshape_55_cast_fp16")]; - bool matmul_18_transpose_x_0 = const()[name = string("matmul_18_transpose_x_0"), val = bool(false)]; - bool matmul_18_transpose_y_0 = const()[name = string("matmul_18_transpose_y_0"), val = bool(false)]; - tensor matmul_18_cast_fp16 = matmul(transpose_x = matmul_18_transpose_x_0, transpose_y = matmul_18_transpose_y_0, x = reshape_54_cast_fp16, y = reshape_55_cast_fp16)[name = string("matmul_18_cast_fp16")]; - tensor concat_341 = const()[name = string("concat_341"), val = tensor([1, 4, 64, 256])]; - tensor reshape_56_cast_fp16 = reshape(shape = concat_341, x = matmul_18_cast_fp16)[name = string("reshape_56_cast_fp16")]; - tensor var_14604_perm_0 = const()[name = string("op_14604_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_14623 = const()[name = string("op_14623"), val = tensor([1, 64, 1024])]; - tensor var_14604_cast_fp16 = transpose(perm = var_14604_perm_0, x = reshape_56_cast_fp16)[name = string("transpose_67")]; - tensor attn_output_185_cast_fp16 = reshape(shape = var_14623, x = var_14604_cast_fp16)[name = string("attn_output_185_cast_fp16")]; - tensor var_14628 = const()[name = string("op_14628"), val = tensor([0, 2, 1])]; - string var_14644_pad_type_0 = const()[name = string("op_14644_pad_type_0"), val = string("valid")]; - int32 var_14644_groups_0 = const()[name = string("op_14644_groups_0"), val = int32(1)]; - tensor var_14644_strides_0 = const()[name = string("op_14644_strides_0"), val = tensor([1])]; - tensor var_14644_pad_0 = const()[name = string("op_14644_pad_0"), val = tensor([0, 0])]; - tensor var_14644_dilations_0 = const()[name = string("op_14644_dilations_0"), val = tensor([1])]; - tensor squeeze_18_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395191552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396076352))))[name = string("squeeze_18_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_14629_cast_fp16 = transpose(perm = var_14628, x = attn_output_185_cast_fp16)[name = string("transpose_66")]; - tensor var_14644_cast_fp16 = conv(dilations = var_14644_dilations_0, groups = var_14644_groups_0, pad = var_14644_pad_0, pad_type = var_14644_pad_type_0, strides = var_14644_strides_0, weight = squeeze_18_cast_fp16_to_fp32_to_fp16_palettized, x = var_14629_cast_fp16)[name = string("op_14644_cast_fp16")]; - tensor var_14648 = const()[name = string("op_14648"), val = tensor([0, 2, 1])]; - int32 var_14659 = const()[name = string("op_14659"), val = int32(-1)]; - fp16 const_805_promoted_to_fp16 = const()[name = string("const_805_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_297_cast_fp16 = transpose(perm = var_14648, x = var_14644_cast_fp16)[name = string("transpose_65")]; - tensor var_14661_cast_fp16 = mul(x = hidden_states_297_cast_fp16, y = const_805_promoted_to_fp16)[name = string("op_14661_cast_fp16")]; - bool input_371_interleave_0 = const()[name = string("input_371_interleave_0"), val = bool(false)]; - tensor input_371_cast_fp16 = concat(axis = var_14659, interleave = input_371_interleave_0, values = (hidden_states_297_cast_fp16, var_14661_cast_fp16))[name = string("input_371_cast_fp16")]; - tensor normed_445_axes_0 = const()[name = string("normed_445_axes_0"), val = tensor([-1])]; - fp16 var_14656_to_fp16 = const()[name = string("op_14656_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_445_cast_fp16 = layer_norm(axes = normed_445_axes_0, epsilon = var_14656_to_fp16, x = input_371_cast_fp16)[name = string("normed_445_cast_fp16")]; - tensor normed_447_begin_0 = const()[name = string("normed_447_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_447_end_0 = const()[name = string("normed_447_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_447_end_mask_0 = const()[name = string("normed_447_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_447_cast_fp16 = slice_by_index(begin = normed_447_begin_0, end = normed_447_end_0, end_mask = normed_447_end_mask_0, x = normed_445_cast_fp16)[name = string("normed_447_cast_fp16")]; - tensor var_14675_to_fp16 = const()[name = string("op_14675_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396094848)))]; - tensor attn_output_189_cast_fp16 = mul(x = normed_447_cast_fp16, y = var_14675_to_fp16)[name = string("attn_output_189_cast_fp16")]; - tensor hidden_states_299_cast_fp16 = add(x = hidden_states_289_cast_fp16, y = attn_output_189_cast_fp16)[name = string("hidden_states_299_cast_fp16")]; - int32 var_14688 = const()[name = string("op_14688"), val = int32(-1)]; - fp16 const_809_promoted_to_fp16 = const()[name = string("const_809_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14690_cast_fp16 = mul(x = hidden_states_299_cast_fp16, y = const_809_promoted_to_fp16)[name = string("op_14690_cast_fp16")]; - bool input_373_interleave_0 = const()[name = string("input_373_interleave_0"), val = bool(false)]; - tensor input_373_cast_fp16 = concat(axis = var_14688, interleave = input_373_interleave_0, values = (hidden_states_299_cast_fp16, var_14690_cast_fp16))[name = string("input_373_cast_fp16")]; - tensor normed_449_axes_0 = const()[name = string("normed_449_axes_0"), val = tensor([-1])]; - fp16 var_14685_to_fp16 = const()[name = string("op_14685_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_449_cast_fp16 = layer_norm(axes = normed_449_axes_0, epsilon = var_14685_to_fp16, x = input_373_cast_fp16)[name = string("normed_449_cast_fp16")]; - tensor normed_451_begin_0 = const()[name = string("normed_451_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_451_end_0 = const()[name = string("normed_451_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_451_end_mask_0 = const()[name = string("normed_451_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_451_cast_fp16 = slice_by_index(begin = normed_451_begin_0, end = normed_451_end_0, end_mask = normed_451_end_mask_0, x = normed_449_cast_fp16)[name = string("normed_451_cast_fp16")]; - tensor var_14704_to_fp16 = const()[name = string("op_14704_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396097216)))]; - tensor x_301_cast_fp16 = mul(x = normed_451_cast_fp16, y = var_14704_to_fp16)[name = string("x_301_cast_fp16")]; - tensor var_14716 = const()[name = string("op_14716"), val = tensor([0, 2, 1])]; - tensor input_375_axes_0 = const()[name = string("input_375_axes_0"), val = tensor([2])]; - tensor var_14717_cast_fp16 = transpose(perm = var_14716, x = x_301_cast_fp16)[name = string("transpose_64")]; - tensor input_375_cast_fp16 = expand_dims(axes = input_375_axes_0, x = var_14717_cast_fp16)[name = string("input_375_cast_fp16")]; - string x_303_pad_type_0 = const()[name = string("x_303_pad_type_0"), val = string("valid")]; - tensor x_303_strides_0 = const()[name = string("x_303_strides_0"), val = tensor([1, 1])]; - tensor x_303_pad_0 = const()[name = string("x_303_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_303_dilations_0 = const()[name = string("x_303_dilations_0"), val = tensor([1, 1])]; - int32 x_303_groups_0 = const()[name = string("x_303_groups_0"), val = int32(1)]; - tensor model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(779945088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1097736704))))[name = string("model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_303_cast_fp16 = conv(dilations = x_303_dilations_0, groups = x_303_groups_0, pad = x_303_pad_0, pad_type = x_303_pad_type_0, strides = x_303_strides_0, weight = model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_375_cast_fp16)[name = string("x_303_cast_fp16")]; - string b_37_pad_type_0 = const()[name = string("b_37_pad_type_0"), val = string("valid")]; - tensor b_37_strides_0 = const()[name = string("b_37_strides_0"), val = tensor([1, 1])]; - tensor b_37_pad_0 = const()[name = string("b_37_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_37_dilations_0 = const()[name = string("b_37_dilations_0"), val = tensor([1, 1])]; - int32 b_37_groups_0 = const()[name = string("b_37_groups_0"), val = int32(1)]; - tensor model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1097847360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1103819392))))[name = string("model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_37_cast_fp16 = conv(dilations = b_37_dilations_0, groups = b_37_groups_0, pad = b_37_pad_0, pad_type = b_37_pad_type_0, strides = b_37_strides_0, weight = model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_375_cast_fp16)[name = string("b_37_cast_fp16")]; - string var_14742_mode_0 = const()[name = string("op_14742_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_14742_cast_fp16 = gelu(mode = var_14742_mode_0, x = x_303_cast_fp16)[name = string("op_14742_cast_fp16")]; - tensor input_377_cast_fp16 = mul(x = var_14742_cast_fp16, y = b_37_cast_fp16)[name = string("input_377_cast_fp16")]; - string e_37_pad_type_0 = const()[name = string("e_37_pad_type_0"), val = string("valid")]; - tensor e_37_strides_0 = const()[name = string("e_37_strides_0"), val = tensor([1, 1])]; - tensor e_37_pad_0 = const()[name = string("e_37_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_37_dilations_0 = const()[name = string("e_37_dilations_0"), val = tensor([1, 1])]; - int32 e_37_groups_0 = const()[name = string("e_37_groups_0"), val = int32(1)]; - tensor model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408264960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414236992))))[name = string("model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_37_cast_fp16 = conv(dilations = e_37_dilations_0, groups = e_37_groups_0, pad = e_37_pad_0, pad_type = e_37_pad_type_0, strides = e_37_strides_0, weight = model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_377_cast_fp16)[name = string("e_37_cast_fp16")]; - tensor var_14750_axes_0 = const()[name = string("op_14750_axes_0"), val = tensor([2])]; - tensor var_14750_cast_fp16 = squeeze(axes = var_14750_axes_0, x = e_37_cast_fp16)[name = string("op_14750_cast_fp16")]; - tensor var_14751 = const()[name = string("op_14751"), val = tensor([0, 2, 1])]; - int32 var_14762 = const()[name = string("op_14762"), val = int32(-1)]; - fp16 const_813_promoted_to_fp16 = const()[name = string("const_813_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_301_cast_fp16 = transpose(perm = var_14751, x = var_14750_cast_fp16)[name = string("transpose_63")]; - tensor var_14764_cast_fp16 = mul(x = hidden_states_301_cast_fp16, y = const_813_promoted_to_fp16)[name = string("op_14764_cast_fp16")]; - bool input_379_interleave_0 = const()[name = string("input_379_interleave_0"), val = bool(false)]; - tensor input_379_cast_fp16 = concat(axis = var_14762, interleave = input_379_interleave_0, values = (hidden_states_301_cast_fp16, var_14764_cast_fp16))[name = string("input_379_cast_fp16")]; - tensor normed_453_axes_0 = const()[name = string("normed_453_axes_0"), val = tensor([-1])]; - fp16 var_14759_to_fp16 = const()[name = string("op_14759_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_453_cast_fp16 = layer_norm(axes = normed_453_axes_0, epsilon = var_14759_to_fp16, x = input_379_cast_fp16)[name = string("normed_453_cast_fp16")]; - tensor normed_455_begin_0 = const()[name = string("normed_455_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_455_end_0 = const()[name = string("normed_455_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_455_end_mask_0 = const()[name = string("normed_455_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_455_cast_fp16 = slice_by_index(begin = normed_455_begin_0, end = normed_455_end_0, end_mask = normed_455_end_mask_0, x = normed_453_cast_fp16)[name = string("normed_455_cast_fp16")]; - tensor var_14778_to_fp16 = const()[name = string("op_14778_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414255488)))]; - tensor hidden_states_303_cast_fp16 = mul(x = normed_455_cast_fp16, y = var_14778_to_fp16)[name = string("hidden_states_303_cast_fp16")]; - tensor hidden_states_305_cast_fp16 = add(x = hidden_states_299_cast_fp16, y = hidden_states_303_cast_fp16)[name = string("hidden_states_305_cast_fp16")]; - int32 var_14832 = const()[name = string("op_14832"), val = int32(-1)]; - fp16 const_818_promoted_to_fp16 = const()[name = string("const_818_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14834_cast_fp16 = mul(x = hidden_states_305_cast_fp16, y = const_818_promoted_to_fp16)[name = string("op_14834_cast_fp16")]; - bool input_381_interleave_0 = const()[name = string("input_381_interleave_0"), val = bool(false)]; - tensor input_381_cast_fp16 = concat(axis = var_14832, interleave = input_381_interleave_0, values = (hidden_states_305_cast_fp16, var_14834_cast_fp16))[name = string("input_381_cast_fp16")]; - tensor normed_457_axes_0 = const()[name = string("normed_457_axes_0"), val = tensor([-1])]; - fp16 var_14829_to_fp16 = const()[name = string("op_14829_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_457_cast_fp16 = layer_norm(axes = normed_457_axes_0, epsilon = var_14829_to_fp16, x = input_381_cast_fp16)[name = string("normed_457_cast_fp16")]; - tensor normed_459_begin_0 = const()[name = string("normed_459_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_459_end_0 = const()[name = string("normed_459_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_459_end_mask_0 = const()[name = string("normed_459_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_459_cast_fp16 = slice_by_index(begin = normed_459_begin_0, end = normed_459_end_0, end_mask = normed_459_end_mask_0, x = normed_457_cast_fp16)[name = string("normed_459_cast_fp16")]; - tensor var_14848_to_fp16 = const()[name = string("op_14848_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414257856)))]; - tensor hidden_states_307_cast_fp16 = mul(x = normed_459_cast_fp16, y = var_14848_to_fp16)[name = string("hidden_states_307_cast_fp16")]; - tensor var_14859 = const()[name = string("op_14859"), val = tensor([0, 2, 1])]; - tensor var_14862_axes_0 = const()[name = string("op_14862_axes_0"), val = tensor([2])]; - tensor var_14860_cast_fp16 = transpose(perm = var_14859, x = hidden_states_307_cast_fp16)[name = string("transpose_62")]; - tensor var_14862_cast_fp16 = expand_dims(axes = var_14862_axes_0, x = var_14860_cast_fp16)[name = string("op_14862_cast_fp16")]; - string query_states_153_pad_type_0 = const()[name = string("query_states_153_pad_type_0"), val = string("valid")]; - tensor query_states_153_strides_0 = const()[name = string("query_states_153_strides_0"), val = tensor([1, 1])]; - tensor query_states_153_pad_0 = const()[name = string("query_states_153_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_153_dilations_0 = const()[name = string("query_states_153_dilations_0"), val = tensor([1, 1])]; - int32 query_states_153_groups_0 = const()[name = string("query_states_153_groups_0"), val = int32(1)]; - tensor query_states_153 = conv(dilations = query_states_153_dilations_0, groups = query_states_153_groups_0, pad = query_states_153_pad_0, pad_type = query_states_153_pad_type_0, strides = query_states_153_strides_0, weight = model_model_layers_19_self_attn_q_proj_weight_palettized, x = var_14862_cast_fp16)[name = string("query_states_153")]; - string key_states_191_pad_type_0 = const()[name = string("key_states_191_pad_type_0"), val = string("valid")]; - tensor key_states_191_strides_0 = const()[name = string("key_states_191_strides_0"), val = tensor([1, 1])]; - tensor key_states_191_pad_0 = const()[name = string("key_states_191_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_191_dilations_0 = const()[name = string("key_states_191_dilations_0"), val = tensor([1, 1])]; - int32 key_states_191_groups_0 = const()[name = string("key_states_191_groups_0"), val = int32(1)]; - tensor key_states_191 = conv(dilations = key_states_191_dilations_0, groups = key_states_191_groups_0, pad = key_states_191_pad_0, pad_type = key_states_191_pad_type_0, strides = key_states_191_strides_0, weight = model_model_layers_19_self_attn_k_proj_weight_palettized, x = var_14862_cast_fp16)[name = string("key_states_191")]; - string value_states_153_pad_type_0 = const()[name = string("value_states_153_pad_type_0"), val = string("valid")]; - tensor value_states_153_strides_0 = const()[name = string("value_states_153_strides_0"), val = tensor([1, 1])]; - tensor value_states_153_pad_0 = const()[name = string("value_states_153_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_153_dilations_0 = const()[name = string("value_states_153_dilations_0"), val = tensor([1, 1])]; - int32 value_states_153_groups_0 = const()[name = string("value_states_153_groups_0"), val = int32(1)]; - tensor value_states_153 = conv(dilations = value_states_153_dilations_0, groups = value_states_153_groups_0, pad = value_states_153_pad_0, pad_type = value_states_153_pad_type_0, strides = value_states_153_strides_0, weight = model_model_layers_19_self_attn_v_proj_weight_palettized, x = var_14862_cast_fp16)[name = string("value_states_153")]; - tensor var_14904 = const()[name = string("op_14904"), val = tensor([1, 4, 256, 64])]; - tensor var_14905 = reshape(shape = var_14904, x = query_states_153)[name = string("op_14905")]; - tensor var_14910 = const()[name = string("op_14910"), val = tensor([0, 1, 3, 2])]; - tensor var_14915 = const()[name = string("op_14915"), val = tensor([1, 1, 256, 64])]; - tensor var_14916 = reshape(shape = var_14915, x = key_states_191)[name = string("op_14916")]; - tensor var_14921 = const()[name = string("op_14921"), val = tensor([0, 1, 3, 2])]; - tensor var_14926 = const()[name = string("op_14926"), val = tensor([1, 1, 256, 64])]; - tensor var_14927 = reshape(shape = var_14926, x = value_states_153)[name = string("op_14927")]; - tensor var_14932 = const()[name = string("op_14932"), val = tensor([0, 1, 3, 2])]; - int32 var_14943 = const()[name = string("op_14943"), val = int32(-1)]; - fp16 const_823_promoted = const()[name = string("const_823_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_309 = transpose(perm = var_14910, x = var_14905)[name = string("transpose_61")]; - tensor var_14945 = mul(x = hidden_states_309, y = const_823_promoted)[name = string("op_14945")]; - bool input_385_interleave_0 = const()[name = string("input_385_interleave_0"), val = bool(false)]; - tensor input_385 = concat(axis = var_14943, interleave = input_385_interleave_0, values = (hidden_states_309, var_14945))[name = string("input_385")]; - tensor normed_461_axes_0 = const()[name = string("normed_461_axes_0"), val = tensor([-1])]; - fp16 var_14940_to_fp16 = const()[name = string("op_14940_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_461_cast_fp16 = layer_norm(axes = normed_461_axes_0, epsilon = var_14940_to_fp16, x = input_385)[name = string("normed_461_cast_fp16")]; - tensor normed_463_begin_0 = const()[name = string("normed_463_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_463_end_0 = const()[name = string("normed_463_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_463_end_mask_0 = const()[name = string("normed_463_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_463 = slice_by_index(begin = normed_463_begin_0, end = normed_463_end_0, end_mask = normed_463_end_mask_0, x = normed_461_cast_fp16)[name = string("normed_463")]; - tensor var_14959_to_fp16 = const()[name = string("op_14959_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414260224)))]; - tensor q_39_cast_fp16 = mul(x = normed_463, y = var_14959_to_fp16)[name = string("q_39_cast_fp16")]; - int32 var_14970 = const()[name = string("op_14970"), val = int32(-1)]; - fp16 const_827_promoted = const()[name = string("const_827_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_311 = transpose(perm = var_14921, x = var_14916)[name = string("transpose_60")]; - tensor var_14972 = mul(x = hidden_states_311, y = const_827_promoted)[name = string("op_14972")]; - bool input_387_interleave_0 = const()[name = string("input_387_interleave_0"), val = bool(false)]; - tensor input_387 = concat(axis = var_14970, interleave = input_387_interleave_0, values = (hidden_states_311, var_14972))[name = string("input_387")]; - tensor normed_465_axes_0 = const()[name = string("normed_465_axes_0"), val = tensor([-1])]; - fp16 var_14967_to_fp16 = const()[name = string("op_14967_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_465_cast_fp16 = layer_norm(axes = normed_465_axes_0, epsilon = var_14967_to_fp16, x = input_387)[name = string("normed_465_cast_fp16")]; - tensor normed_467_begin_0 = const()[name = string("normed_467_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_467_end_0 = const()[name = string("normed_467_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_467_end_mask_0 = const()[name = string("normed_467_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_467 = slice_by_index(begin = normed_467_begin_0, end = normed_467_end_0, end_mask = normed_467_end_mask_0, x = normed_465_cast_fp16)[name = string("normed_467")]; - tensor var_14986_to_fp16 = const()[name = string("op_14986_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414260800)))]; - tensor k_39_cast_fp16 = mul(x = normed_467, y = var_14986_to_fp16)[name = string("k_39_cast_fp16")]; - tensor var_15000_cast_fp16 = mul(x = q_39_cast_fp16, y = cos_5)[name = string("op_15000_cast_fp16")]; - tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_77_cast_fp16 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_39_cast_fp16)[name = string("x1_77_cast_fp16")]; - tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_77_cast_fp16 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_39_cast_fp16)[name = string("x2_77_cast_fp16")]; - fp16 const_833_promoted_to_fp16 = const()[name = string("const_833_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15021_cast_fp16 = mul(x = x2_77_cast_fp16, y = const_833_promoted_to_fp16)[name = string("op_15021_cast_fp16")]; - int32 var_15023 = const()[name = string("op_15023"), val = int32(-1)]; - bool var_15024_interleave_0 = const()[name = string("op_15024_interleave_0"), val = bool(false)]; - tensor var_15024_cast_fp16 = concat(axis = var_15023, interleave = var_15024_interleave_0, values = (var_15021_cast_fp16, x1_77_cast_fp16))[name = string("op_15024_cast_fp16")]; - tensor var_15025_cast_fp16 = mul(x = var_15024_cast_fp16, y = sin_5)[name = string("op_15025_cast_fp16")]; - tensor query_states_155_cast_fp16 = add(x = var_15000_cast_fp16, y = var_15025_cast_fp16)[name = string("query_states_155_cast_fp16")]; - tensor var_15028_cast_fp16 = mul(x = k_39_cast_fp16, y = cos_5)[name = string("op_15028_cast_fp16")]; - tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_79_cast_fp16 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_39_cast_fp16)[name = string("x1_79_cast_fp16")]; - tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_79_cast_fp16 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_39_cast_fp16)[name = string("x2_79_cast_fp16")]; - fp16 const_836_promoted_to_fp16 = const()[name = string("const_836_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15049_cast_fp16 = mul(x = x2_79_cast_fp16, y = const_836_promoted_to_fp16)[name = string("op_15049_cast_fp16")]; - int32 var_15051 = const()[name = string("op_15051"), val = int32(-1)]; - bool var_15052_interleave_0 = const()[name = string("op_15052_interleave_0"), val = bool(false)]; - tensor var_15052_cast_fp16 = concat(axis = var_15051, interleave = var_15052_interleave_0, values = (var_15049_cast_fp16, x1_79_cast_fp16))[name = string("op_15052_cast_fp16")]; - tensor var_15053_cast_fp16 = mul(x = var_15052_cast_fp16, y = sin_5)[name = string("op_15053_cast_fp16")]; - tensor key_states_193_cast_fp16 = add(x = var_15028_cast_fp16, y = var_15053_cast_fp16)[name = string("key_states_193_cast_fp16")]; - tensor expand_dims_228 = const()[name = string("expand_dims_228"), val = tensor([16])]; - tensor expand_dims_229 = const()[name = string("expand_dims_229"), val = tensor([0])]; - tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([0])]; - tensor expand_dims_232 = const()[name = string("expand_dims_232"), val = tensor([17])]; - int32 concat_344_axis_0 = const()[name = string("concat_344_axis_0"), val = int32(0)]; - bool concat_344_interleave_0 = const()[name = string("concat_344_interleave_0"), val = bool(false)]; - tensor concat_344 = concat(axis = concat_344_axis_0, interleave = concat_344_interleave_0, values = (expand_dims_228, expand_dims_229, current_pos, expand_dims_231))[name = string("concat_344")]; - tensor concat_345_values1_0 = const()[name = string("concat_345_values1_0"), val = tensor([0])]; - tensor concat_345_values3_0 = const()[name = string("concat_345_values3_0"), val = tensor([0])]; - int32 concat_345_axis_0 = const()[name = string("concat_345_axis_0"), val = int32(0)]; - bool concat_345_interleave_0 = const()[name = string("concat_345_interleave_0"), val = bool(false)]; - tensor concat_345 = concat(axis = concat_345_axis_0, interleave = concat_345_interleave_0, values = (expand_dims_232, concat_345_values1_0, end_pos_1, concat_345_values3_0))[name = string("concat_345")]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16 = slice_update(begin = concat_344, begin_mask = model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0, end = concat_345, end_mask = model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_33_stride_0, update = key_states_193_cast_fp16, x = coreml_update_state_89)[name = string("model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_142_write_state")]; - tensor coreml_update_state_90 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_142")]; - tensor expand_dims_234 = const()[name = string("expand_dims_234"), val = tensor([38])]; - tensor expand_dims_235 = const()[name = string("expand_dims_235"), val = tensor([0])]; - tensor expand_dims_237 = const()[name = string("expand_dims_237"), val = tensor([0])]; - tensor expand_dims_238 = const()[name = string("expand_dims_238"), val = tensor([39])]; - int32 concat_348_axis_0 = const()[name = string("concat_348_axis_0"), val = int32(0)]; - bool concat_348_interleave_0 = const()[name = string("concat_348_interleave_0"), val = bool(false)]; - tensor concat_348 = concat(axis = concat_348_axis_0, interleave = concat_348_interleave_0, values = (expand_dims_234, expand_dims_235, current_pos, expand_dims_237))[name = string("concat_348")]; - tensor concat_349_values1_0 = const()[name = string("concat_349_values1_0"), val = tensor([0])]; - tensor concat_349_values3_0 = const()[name = string("concat_349_values3_0"), val = tensor([0])]; - int32 concat_349_axis_0 = const()[name = string("concat_349_axis_0"), val = int32(0)]; - bool concat_349_interleave_0 = const()[name = string("concat_349_interleave_0"), val = bool(false)]; - tensor concat_349 = concat(axis = concat_349_axis_0, interleave = concat_349_interleave_0, values = (expand_dims_238, concat_349_values1_0, end_pos_1, concat_349_values3_0))[name = string("concat_349")]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_155 = transpose(perm = var_14932, x = var_14927)[name = string("transpose_59")]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16 = slice_update(begin = concat_348, begin_mask = model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0, end = concat_349, end_mask = model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_34_stride_0, update = value_states_155, x = coreml_update_state_90)[name = string("model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_143_write_state")]; - tensor coreml_update_state_91 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_143")]; - tensor var_15152_begin_0 = const()[name = string("op_15152_begin_0"), val = tensor([16, 0, 0, 0])]; - tensor var_15152_end_0 = const()[name = string("op_15152_end_0"), val = tensor([17, 1, 512, 256])]; - tensor var_15152_end_mask_0 = const()[name = string("op_15152_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_15152_cast_fp16 = slice_by_index(begin = var_15152_begin_0, end = var_15152_end_0, end_mask = var_15152_end_mask_0, x = coreml_update_state_91)[name = string("op_15152_cast_fp16")]; - tensor var_15159_begin_0 = const()[name = string("op_15159_begin_0"), val = tensor([38, 0, 0, 0])]; - tensor var_15159_end_0 = const()[name = string("op_15159_end_0"), val = tensor([39, 1, 512, 256])]; - tensor var_15159_end_mask_0 = const()[name = string("op_15159_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_15159_cast_fp16 = slice_by_index(begin = var_15159_begin_0, end = var_15159_end_0, end_mask = var_15159_end_mask_0, x = coreml_update_state_91)[name = string("op_15159_cast_fp16")]; - tensor var_15198 = const()[name = string("op_15198"), val = tensor([1, 4, 1, 1])]; - tensor x_309_cast_fp16 = tile(reps = var_15198, x = var_15152_cast_fp16)[name = string("x_309_cast_fp16")]; - tensor var_15218 = const()[name = string("op_15218"), val = tensor([1, 4, 1, 1])]; - tensor x_315_cast_fp16 = tile(reps = var_15218, x = var_15159_cast_fp16)[name = string("x_315_cast_fp16")]; - bool var_15245_transpose_x_0 = const()[name = string("op_15245_transpose_x_0"), val = bool(false)]; - bool var_15245_transpose_y_0 = const()[name = string("op_15245_transpose_y_0"), val = bool(true)]; - tensor var_15245 = matmul(transpose_x = var_15245_transpose_x_0, transpose_y = var_15245_transpose_y_0, x = query_states_155_cast_fp16, y = x_309_cast_fp16)[name = string("op_15245")]; - fp16 var_15246_to_fp16 = const()[name = string("op_15246_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_77_cast_fp16 = mul(x = var_15245, y = var_15246_to_fp16)[name = string("attn_weights_77_cast_fp16")]; - tensor attn_weights_79_cast_fp16 = add(x = attn_weights_77_cast_fp16, y = mask_slice_1)[name = string("attn_weights_79_cast_fp16")]; - int32 var_15281 = const()[name = string("op_15281"), val = int32(-1)]; - tensor var_15283_cast_fp16 = softmax(axis = var_15281, x = attn_weights_79_cast_fp16)[name = string("op_15283_cast_fp16")]; - tensor concat_354 = const()[name = string("concat_354"), val = tensor([4, 64, 512])]; - tensor reshape_57_cast_fp16 = reshape(shape = concat_354, x = var_15283_cast_fp16)[name = string("reshape_57_cast_fp16")]; - tensor concat_355 = const()[name = string("concat_355"), val = tensor([4, 512, 256])]; - tensor reshape_58_cast_fp16 = reshape(shape = concat_355, x = x_315_cast_fp16)[name = string("reshape_58_cast_fp16")]; - bool matmul_19_transpose_x_0 = const()[name = string("matmul_19_transpose_x_0"), val = bool(false)]; - bool matmul_19_transpose_y_0 = const()[name = string("matmul_19_transpose_y_0"), val = bool(false)]; - tensor matmul_19_cast_fp16 = matmul(transpose_x = matmul_19_transpose_x_0, transpose_y = matmul_19_transpose_y_0, x = reshape_57_cast_fp16, y = reshape_58_cast_fp16)[name = string("matmul_19_cast_fp16")]; - tensor concat_359 = const()[name = string("concat_359"), val = tensor([1, 4, 64, 256])]; - tensor reshape_59_cast_fp16 = reshape(shape = concat_359, x = matmul_19_cast_fp16)[name = string("reshape_59_cast_fp16")]; - tensor var_15295_perm_0 = const()[name = string("op_15295_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_15314 = const()[name = string("op_15314"), val = tensor([1, 64, 1024])]; - tensor var_15295_cast_fp16 = transpose(perm = var_15295_perm_0, x = reshape_59_cast_fp16)[name = string("transpose_58")]; - tensor attn_output_195_cast_fp16 = reshape(shape = var_15314, x = var_15295_cast_fp16)[name = string("attn_output_195_cast_fp16")]; - tensor var_15319 = const()[name = string("op_15319"), val = tensor([0, 2, 1])]; - string var_15335_pad_type_0 = const()[name = string("op_15335_pad_type_0"), val = string("valid")]; - int32 var_15335_groups_0 = const()[name = string("op_15335_groups_0"), val = int32(1)]; - tensor var_15335_strides_0 = const()[name = string("op_15335_strides_0"), val = tensor([1])]; - tensor var_15335_pad_0 = const()[name = string("op_15335_pad_0"), val = tensor([0, 0])]; - tensor var_15335_dilations_0 = const()[name = string("op_15335_dilations_0"), val = tensor([1])]; - tensor squeeze_19_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414261376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415146176))))[name = string("squeeze_19_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_15320_cast_fp16 = transpose(perm = var_15319, x = attn_output_195_cast_fp16)[name = string("transpose_57")]; - tensor var_15335_cast_fp16 = conv(dilations = var_15335_dilations_0, groups = var_15335_groups_0, pad = var_15335_pad_0, pad_type = var_15335_pad_type_0, strides = var_15335_strides_0, weight = squeeze_19_cast_fp16_to_fp32_to_fp16_palettized, x = var_15320_cast_fp16)[name = string("op_15335_cast_fp16")]; - tensor var_15339 = const()[name = string("op_15339"), val = tensor([0, 2, 1])]; - int32 var_15350 = const()[name = string("op_15350"), val = int32(-1)]; - fp16 const_848_promoted_to_fp16 = const()[name = string("const_848_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_313_cast_fp16 = transpose(perm = var_15339, x = var_15335_cast_fp16)[name = string("transpose_56")]; - tensor var_15352_cast_fp16 = mul(x = hidden_states_313_cast_fp16, y = const_848_promoted_to_fp16)[name = string("op_15352_cast_fp16")]; - bool input_391_interleave_0 = const()[name = string("input_391_interleave_0"), val = bool(false)]; - tensor input_391_cast_fp16 = concat(axis = var_15350, interleave = input_391_interleave_0, values = (hidden_states_313_cast_fp16, var_15352_cast_fp16))[name = string("input_391_cast_fp16")]; - tensor normed_469_axes_0 = const()[name = string("normed_469_axes_0"), val = tensor([-1])]; - fp16 var_15347_to_fp16 = const()[name = string("op_15347_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_469_cast_fp16 = layer_norm(axes = normed_469_axes_0, epsilon = var_15347_to_fp16, x = input_391_cast_fp16)[name = string("normed_469_cast_fp16")]; - tensor normed_471_begin_0 = const()[name = string("normed_471_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_471_end_0 = const()[name = string("normed_471_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_471_end_mask_0 = const()[name = string("normed_471_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_471_cast_fp16 = slice_by_index(begin = normed_471_begin_0, end = normed_471_end_0, end_mask = normed_471_end_mask_0, x = normed_469_cast_fp16)[name = string("normed_471_cast_fp16")]; - tensor var_15366_to_fp16 = const()[name = string("op_15366_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415164672)))]; - tensor attn_output_199_cast_fp16 = mul(x = normed_471_cast_fp16, y = var_15366_to_fp16)[name = string("attn_output_199_cast_fp16")]; - tensor hidden_states_315_cast_fp16 = add(x = hidden_states_305_cast_fp16, y = attn_output_199_cast_fp16)[name = string("hidden_states_315_cast_fp16")]; - int32 var_15379 = const()[name = string("op_15379"), val = int32(-1)]; - fp16 const_852_promoted_to_fp16 = const()[name = string("const_852_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15381_cast_fp16 = mul(x = hidden_states_315_cast_fp16, y = const_852_promoted_to_fp16)[name = string("op_15381_cast_fp16")]; - bool input_393_interleave_0 = const()[name = string("input_393_interleave_0"), val = bool(false)]; - tensor input_393_cast_fp16 = concat(axis = var_15379, interleave = input_393_interleave_0, values = (hidden_states_315_cast_fp16, var_15381_cast_fp16))[name = string("input_393_cast_fp16")]; - tensor normed_473_axes_0 = const()[name = string("normed_473_axes_0"), val = tensor([-1])]; - fp16 var_15376_to_fp16 = const()[name = string("op_15376_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_473_cast_fp16 = layer_norm(axes = normed_473_axes_0, epsilon = var_15376_to_fp16, x = input_393_cast_fp16)[name = string("normed_473_cast_fp16")]; - tensor normed_475_begin_0 = const()[name = string("normed_475_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_475_end_0 = const()[name = string("normed_475_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_475_end_mask_0 = const()[name = string("normed_475_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_475_cast_fp16 = slice_by_index(begin = normed_475_begin_0, end = normed_475_end_0, end_mask = normed_475_end_mask_0, x = normed_473_cast_fp16)[name = string("normed_475_cast_fp16")]; - tensor var_15395_to_fp16 = const()[name = string("op_15395_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415167040)))]; - tensor x_317_cast_fp16 = mul(x = normed_475_cast_fp16, y = var_15395_to_fp16)[name = string("x_317_cast_fp16")]; - tensor var_15407 = const()[name = string("op_15407"), val = tensor([0, 2, 1])]; - tensor input_395_axes_0 = const()[name = string("input_395_axes_0"), val = tensor([2])]; - tensor var_15408_cast_fp16 = transpose(perm = var_15407, x = x_317_cast_fp16)[name = string("transpose_55")]; - tensor input_395_cast_fp16 = expand_dims(axes = input_395_axes_0, x = var_15408_cast_fp16)[name = string("input_395_cast_fp16")]; - string x_319_pad_type_0 = const()[name = string("x_319_pad_type_0"), val = string("valid")]; - tensor x_319_strides_0 = const()[name = string("x_319_strides_0"), val = tensor([1, 1])]; - tensor x_319_pad_0 = const()[name = string("x_319_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_319_dilations_0 = const()[name = string("x_319_dilations_0"), val = tensor([1, 1])]; - int32 x_319_groups_0 = const()[name = string("x_319_groups_0"), val = int32(1)]; - tensor model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1103930048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1109902080))))[name = string("model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_319_cast_fp16 = conv(dilations = x_319_dilations_0, groups = x_319_groups_0, pad = x_319_pad_0, pad_type = x_319_pad_type_0, strides = x_319_strides_0, weight = model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_395_cast_fp16)[name = string("x_319_cast_fp16")]; - string b_39_pad_type_0 = const()[name = string("b_39_pad_type_0"), val = string("valid")]; - tensor b_39_strides_0 = const()[name = string("b_39_strides_0"), val = tensor([1, 1])]; - tensor b_39_pad_0 = const()[name = string("b_39_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_39_dilations_0 = const()[name = string("b_39_dilations_0"), val = tensor([1, 1])]; - int32 b_39_groups_0 = const()[name = string("b_39_groups_0"), val = int32(1)]; - tensor model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1110012736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115984768))))[name = string("model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_39_cast_fp16 = conv(dilations = b_39_dilations_0, groups = b_39_groups_0, pad = b_39_pad_0, pad_type = b_39_pad_type_0, strides = b_39_strides_0, weight = model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_395_cast_fp16)[name = string("b_39_cast_fp16")]; - string var_15433_mode_0 = const()[name = string("op_15433_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_15433_cast_fp16 = gelu(mode = var_15433_mode_0, x = x_319_cast_fp16)[name = string("op_15433_cast_fp16")]; - tensor input_397_cast_fp16 = mul(x = var_15433_cast_fp16, y = b_39_cast_fp16)[name = string("input_397_cast_fp16")]; - string e_39_pad_type_0 = const()[name = string("e_39_pad_type_0"), val = string("valid")]; - tensor e_39_strides_0 = const()[name = string("e_39_strides_0"), val = tensor([1, 1])]; - tensor e_39_pad_0 = const()[name = string("e_39_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_39_dilations_0 = const()[name = string("e_39_dilations_0"), val = tensor([1, 1])]; - int32 e_39_groups_0 = const()[name = string("e_39_groups_0"), val = int32(1)]; - tensor model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427334784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433306816))))[name = string("model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_39_cast_fp16 = conv(dilations = e_39_dilations_0, groups = e_39_groups_0, pad = e_39_pad_0, pad_type = e_39_pad_type_0, strides = e_39_strides_0, weight = model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_397_cast_fp16)[name = string("e_39_cast_fp16")]; - tensor var_15441_axes_0 = const()[name = string("op_15441_axes_0"), val = tensor([2])]; - tensor var_15441_cast_fp16 = squeeze(axes = var_15441_axes_0, x = e_39_cast_fp16)[name = string("op_15441_cast_fp16")]; - tensor var_15442 = const()[name = string("op_15442"), val = tensor([0, 2, 1])]; - int32 var_15453 = const()[name = string("op_15453"), val = int32(-1)]; - fp16 const_856_promoted_to_fp16 = const()[name = string("const_856_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_317_cast_fp16 = transpose(perm = var_15442, x = var_15441_cast_fp16)[name = string("transpose_54")]; - tensor var_15455_cast_fp16 = mul(x = hidden_states_317_cast_fp16, y = const_856_promoted_to_fp16)[name = string("op_15455_cast_fp16")]; - bool input_399_interleave_0 = const()[name = string("input_399_interleave_0"), val = bool(false)]; - tensor input_399_cast_fp16 = concat(axis = var_15453, interleave = input_399_interleave_0, values = (hidden_states_317_cast_fp16, var_15455_cast_fp16))[name = string("input_399_cast_fp16")]; - tensor normed_477_axes_0 = const()[name = string("normed_477_axes_0"), val = tensor([-1])]; - fp16 var_15450_to_fp16 = const()[name = string("op_15450_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_477_cast_fp16 = layer_norm(axes = normed_477_axes_0, epsilon = var_15450_to_fp16, x = input_399_cast_fp16)[name = string("normed_477_cast_fp16")]; - tensor normed_479_begin_0 = const()[name = string("normed_479_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_479_end_0 = const()[name = string("normed_479_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_479_end_mask_0 = const()[name = string("normed_479_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_479_cast_fp16 = slice_by_index(begin = normed_479_begin_0, end = normed_479_end_0, end_mask = normed_479_end_mask_0, x = normed_477_cast_fp16)[name = string("normed_479_cast_fp16")]; - tensor var_15469_to_fp16 = const()[name = string("op_15469_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433325312)))]; - tensor hidden_states_319_cast_fp16 = mul(x = normed_479_cast_fp16, y = var_15469_to_fp16)[name = string("hidden_states_319_cast_fp16")]; - tensor hidden_states_321_cast_fp16 = add(x = hidden_states_315_cast_fp16, y = hidden_states_319_cast_fp16)[name = string("hidden_states_321_cast_fp16")]; - int32 var_15523 = const()[name = string("op_15523"), val = int32(-1)]; - fp16 const_861_promoted_to_fp16 = const()[name = string("const_861_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15525_cast_fp16 = mul(x = hidden_states_321_cast_fp16, y = const_861_promoted_to_fp16)[name = string("op_15525_cast_fp16")]; - bool input_401_interleave_0 = const()[name = string("input_401_interleave_0"), val = bool(false)]; - tensor input_401_cast_fp16 = concat(axis = var_15523, interleave = input_401_interleave_0, values = (hidden_states_321_cast_fp16, var_15525_cast_fp16))[name = string("input_401_cast_fp16")]; - tensor normed_481_axes_0 = const()[name = string("normed_481_axes_0"), val = tensor([-1])]; - fp16 var_15520_to_fp16 = const()[name = string("op_15520_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_481_cast_fp16 = layer_norm(axes = normed_481_axes_0, epsilon = var_15520_to_fp16, x = input_401_cast_fp16)[name = string("normed_481_cast_fp16")]; - tensor normed_483_begin_0 = const()[name = string("normed_483_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_483_end_0 = const()[name = string("normed_483_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_483_end_mask_0 = const()[name = string("normed_483_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_483_cast_fp16 = slice_by_index(begin = normed_483_begin_0, end = normed_483_end_0, end_mask = normed_483_end_mask_0, x = normed_481_cast_fp16)[name = string("normed_483_cast_fp16")]; - tensor var_15539_to_fp16 = const()[name = string("op_15539_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433327680)))]; - tensor hidden_states_323_cast_fp16 = mul(x = normed_483_cast_fp16, y = var_15539_to_fp16)[name = string("hidden_states_323_cast_fp16")]; - tensor var_15550 = const()[name = string("op_15550"), val = tensor([0, 2, 1])]; - tensor var_15553_axes_0 = const()[name = string("op_15553_axes_0"), val = tensor([2])]; - tensor var_15551_cast_fp16 = transpose(perm = var_15550, x = hidden_states_323_cast_fp16)[name = string("transpose_53")]; - tensor var_15553_cast_fp16 = expand_dims(axes = var_15553_axes_0, x = var_15551_cast_fp16)[name = string("op_15553_cast_fp16")]; - string query_states_161_pad_type_0 = const()[name = string("query_states_161_pad_type_0"), val = string("valid")]; - tensor query_states_161_strides_0 = const()[name = string("query_states_161_strides_0"), val = tensor([1, 1])]; - tensor query_states_161_pad_0 = const()[name = string("query_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_161_dilations_0 = const()[name = string("query_states_161_dilations_0"), val = tensor([1, 1])]; - int32 query_states_161_groups_0 = const()[name = string("query_states_161_groups_0"), val = int32(1)]; - tensor query_states_161 = conv(dilations = query_states_161_dilations_0, groups = query_states_161_groups_0, pad = query_states_161_pad_0, pad_type = query_states_161_pad_type_0, strides = query_states_161_strides_0, weight = model_model_layers_20_self_attn_q_proj_weight_palettized, x = var_15553_cast_fp16)[name = string("query_states_161")]; - string key_states_201_pad_type_0 = const()[name = string("key_states_201_pad_type_0"), val = string("valid")]; - tensor key_states_201_strides_0 = const()[name = string("key_states_201_strides_0"), val = tensor([1, 1])]; - tensor key_states_201_pad_0 = const()[name = string("key_states_201_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_201_dilations_0 = const()[name = string("key_states_201_dilations_0"), val = tensor([1, 1])]; - int32 key_states_201_groups_0 = const()[name = string("key_states_201_groups_0"), val = int32(1)]; - tensor key_states_201 = conv(dilations = key_states_201_dilations_0, groups = key_states_201_groups_0, pad = key_states_201_pad_0, pad_type = key_states_201_pad_type_0, strides = key_states_201_strides_0, weight = model_model_layers_20_self_attn_k_proj_weight_palettized, x = var_15553_cast_fp16)[name = string("key_states_201")]; - string value_states_161_pad_type_0 = const()[name = string("value_states_161_pad_type_0"), val = string("valid")]; - tensor value_states_161_strides_0 = const()[name = string("value_states_161_strides_0"), val = tensor([1, 1])]; - tensor value_states_161_pad_0 = const()[name = string("value_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_161_dilations_0 = const()[name = string("value_states_161_dilations_0"), val = tensor([1, 1])]; - int32 value_states_161_groups_0 = const()[name = string("value_states_161_groups_0"), val = int32(1)]; - tensor value_states_161 = conv(dilations = value_states_161_dilations_0, groups = value_states_161_groups_0, pad = value_states_161_pad_0, pad_type = value_states_161_pad_type_0, strides = value_states_161_strides_0, weight = model_model_layers_20_self_attn_v_proj_weight_palettized, x = var_15553_cast_fp16)[name = string("value_states_161")]; - tensor var_15595 = const()[name = string("op_15595"), val = tensor([1, 4, 256, 64])]; - tensor var_15596 = reshape(shape = var_15595, x = query_states_161)[name = string("op_15596")]; - tensor var_15601 = const()[name = string("op_15601"), val = tensor([0, 1, 3, 2])]; - tensor var_15606 = const()[name = string("op_15606"), val = tensor([1, 1, 256, 64])]; - tensor var_15607 = reshape(shape = var_15606, x = key_states_201)[name = string("op_15607")]; - tensor var_15612 = const()[name = string("op_15612"), val = tensor([0, 1, 3, 2])]; - tensor var_15617 = const()[name = string("op_15617"), val = tensor([1, 1, 256, 64])]; - tensor var_15618 = reshape(shape = var_15617, x = value_states_161)[name = string("op_15618")]; - tensor var_15623 = const()[name = string("op_15623"), val = tensor([0, 1, 3, 2])]; - int32 var_15634 = const()[name = string("op_15634"), val = int32(-1)]; - fp16 const_866_promoted = const()[name = string("const_866_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_325 = transpose(perm = var_15601, x = var_15596)[name = string("transpose_52")]; - tensor var_15636 = mul(x = hidden_states_325, y = const_866_promoted)[name = string("op_15636")]; - bool input_405_interleave_0 = const()[name = string("input_405_interleave_0"), val = bool(false)]; - tensor input_405 = concat(axis = var_15634, interleave = input_405_interleave_0, values = (hidden_states_325, var_15636))[name = string("input_405")]; - tensor normed_485_axes_0 = const()[name = string("normed_485_axes_0"), val = tensor([-1])]; - fp16 var_15631_to_fp16 = const()[name = string("op_15631_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_485_cast_fp16 = layer_norm(axes = normed_485_axes_0, epsilon = var_15631_to_fp16, x = input_405)[name = string("normed_485_cast_fp16")]; - tensor normed_487_begin_0 = const()[name = string("normed_487_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_487_end_0 = const()[name = string("normed_487_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_487_end_mask_0 = const()[name = string("normed_487_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_487 = slice_by_index(begin = normed_487_begin_0, end = normed_487_end_0, end_mask = normed_487_end_mask_0, x = normed_485_cast_fp16)[name = string("normed_487")]; - tensor var_15650_to_fp16 = const()[name = string("op_15650_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433330048)))]; - tensor q_41_cast_fp16 = mul(x = normed_487, y = var_15650_to_fp16)[name = string("q_41_cast_fp16")]; - int32 var_15661 = const()[name = string("op_15661"), val = int32(-1)]; - fp16 const_870_promoted = const()[name = string("const_870_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_327 = transpose(perm = var_15612, x = var_15607)[name = string("transpose_51")]; - tensor var_15663 = mul(x = hidden_states_327, y = const_870_promoted)[name = string("op_15663")]; - bool input_407_interleave_0 = const()[name = string("input_407_interleave_0"), val = bool(false)]; - tensor input_407 = concat(axis = var_15661, interleave = input_407_interleave_0, values = (hidden_states_327, var_15663))[name = string("input_407")]; - tensor normed_489_axes_0 = const()[name = string("normed_489_axes_0"), val = tensor([-1])]; - fp16 var_15658_to_fp16 = const()[name = string("op_15658_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_489_cast_fp16 = layer_norm(axes = normed_489_axes_0, epsilon = var_15658_to_fp16, x = input_407)[name = string("normed_489_cast_fp16")]; - tensor normed_491_begin_0 = const()[name = string("normed_491_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_491_end_0 = const()[name = string("normed_491_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_491_end_mask_0 = const()[name = string("normed_491_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_491 = slice_by_index(begin = normed_491_begin_0, end = normed_491_end_0, end_mask = normed_491_end_mask_0, x = normed_489_cast_fp16)[name = string("normed_491")]; - tensor var_15677_to_fp16 = const()[name = string("op_15677_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433330624)))]; - tensor k_41_cast_fp16 = mul(x = normed_491, y = var_15677_to_fp16)[name = string("k_41_cast_fp16")]; - tensor var_15691_cast_fp16 = mul(x = q_41_cast_fp16, y = cos_5)[name = string("op_15691_cast_fp16")]; - tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_81_cast_fp16 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_41_cast_fp16)[name = string("x1_81_cast_fp16")]; - tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_81_cast_fp16 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_41_cast_fp16)[name = string("x2_81_cast_fp16")]; - fp16 const_876_promoted_to_fp16 = const()[name = string("const_876_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15712_cast_fp16 = mul(x = x2_81_cast_fp16, y = const_876_promoted_to_fp16)[name = string("op_15712_cast_fp16")]; - int32 var_15714 = const()[name = string("op_15714"), val = int32(-1)]; - bool var_15715_interleave_0 = const()[name = string("op_15715_interleave_0"), val = bool(false)]; - tensor var_15715_cast_fp16 = concat(axis = var_15714, interleave = var_15715_interleave_0, values = (var_15712_cast_fp16, x1_81_cast_fp16))[name = string("op_15715_cast_fp16")]; - tensor var_15716_cast_fp16 = mul(x = var_15715_cast_fp16, y = sin_5)[name = string("op_15716_cast_fp16")]; - tensor query_states_163_cast_fp16 = add(x = var_15691_cast_fp16, y = var_15716_cast_fp16)[name = string("query_states_163_cast_fp16")]; - tensor var_15719_cast_fp16 = mul(x = k_41_cast_fp16, y = cos_5)[name = string("op_15719_cast_fp16")]; - tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_83_cast_fp16 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_41_cast_fp16)[name = string("x1_83_cast_fp16")]; - tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_83_cast_fp16 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_41_cast_fp16)[name = string("x2_83_cast_fp16")]; - fp16 const_879_promoted_to_fp16 = const()[name = string("const_879_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15740_cast_fp16 = mul(x = x2_83_cast_fp16, y = const_879_promoted_to_fp16)[name = string("op_15740_cast_fp16")]; - int32 var_15742 = const()[name = string("op_15742"), val = int32(-1)]; - bool var_15743_interleave_0 = const()[name = string("op_15743_interleave_0"), val = bool(false)]; - tensor var_15743_cast_fp16 = concat(axis = var_15742, interleave = var_15743_interleave_0, values = (var_15740_cast_fp16, x1_83_cast_fp16))[name = string("op_15743_cast_fp16")]; - tensor var_15744_cast_fp16 = mul(x = var_15743_cast_fp16, y = sin_5)[name = string("op_15744_cast_fp16")]; - tensor key_states_203_cast_fp16 = add(x = var_15719_cast_fp16, y = var_15744_cast_fp16)[name = string("key_states_203_cast_fp16")]; - tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([17])]; - tensor expand_dims_241 = const()[name = string("expand_dims_241"), val = tensor([0])]; - tensor expand_dims_243 = const()[name = string("expand_dims_243"), val = tensor([0])]; - tensor expand_dims_244 = const()[name = string("expand_dims_244"), val = tensor([18])]; - int32 concat_362_axis_0 = const()[name = string("concat_362_axis_0"), val = int32(0)]; - bool concat_362_interleave_0 = const()[name = string("concat_362_interleave_0"), val = bool(false)]; - tensor concat_362 = concat(axis = concat_362_axis_0, interleave = concat_362_interleave_0, values = (expand_dims_240, expand_dims_241, current_pos, expand_dims_243))[name = string("concat_362")]; - tensor concat_363_values1_0 = const()[name = string("concat_363_values1_0"), val = tensor([0])]; - tensor concat_363_values3_0 = const()[name = string("concat_363_values3_0"), val = tensor([0])]; - int32 concat_363_axis_0 = const()[name = string("concat_363_axis_0"), val = int32(0)]; - bool concat_363_interleave_0 = const()[name = string("concat_363_interleave_0"), val = bool(false)]; - tensor concat_363 = concat(axis = concat_363_axis_0, interleave = concat_363_interleave_0, values = (expand_dims_244, concat_363_values1_0, end_pos_1, concat_363_values3_0))[name = string("concat_363")]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16 = slice_update(begin = concat_362, begin_mask = model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0, end = concat_363, end_mask = model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_35_stride_0, update = key_states_203_cast_fp16, x = coreml_update_state_91)[name = string("model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_144_write_state")]; - tensor coreml_update_state_92 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_144")]; - tensor expand_dims_246 = const()[name = string("expand_dims_246"), val = tensor([39])]; - tensor expand_dims_247 = const()[name = string("expand_dims_247"), val = tensor([0])]; - tensor expand_dims_249 = const()[name = string("expand_dims_249"), val = tensor([0])]; - tensor expand_dims_250 = const()[name = string("expand_dims_250"), val = tensor([40])]; - int32 concat_366_axis_0 = const()[name = string("concat_366_axis_0"), val = int32(0)]; - bool concat_366_interleave_0 = const()[name = string("concat_366_interleave_0"), val = bool(false)]; - tensor concat_366 = concat(axis = concat_366_axis_0, interleave = concat_366_interleave_0, values = (expand_dims_246, expand_dims_247, current_pos, expand_dims_249))[name = string("concat_366")]; - tensor concat_367_values1_0 = const()[name = string("concat_367_values1_0"), val = tensor([0])]; - tensor concat_367_values3_0 = const()[name = string("concat_367_values3_0"), val = tensor([0])]; - int32 concat_367_axis_0 = const()[name = string("concat_367_axis_0"), val = int32(0)]; - bool concat_367_interleave_0 = const()[name = string("concat_367_interleave_0"), val = bool(false)]; - tensor concat_367 = concat(axis = concat_367_axis_0, interleave = concat_367_interleave_0, values = (expand_dims_250, concat_367_values1_0, end_pos_1, concat_367_values3_0))[name = string("concat_367")]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_163 = transpose(perm = var_15623, x = var_15618)[name = string("transpose_50")]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16 = slice_update(begin = concat_366, begin_mask = model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0, end = concat_367, end_mask = model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_36_stride_0, update = value_states_163, x = coreml_update_state_92)[name = string("model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_145_write_state")]; - tensor coreml_update_state_93 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_145")]; - tensor var_15843_begin_0 = const()[name = string("op_15843_begin_0"), val = tensor([17, 0, 0, 0])]; - tensor var_15843_end_0 = const()[name = string("op_15843_end_0"), val = tensor([18, 1, 512, 256])]; - tensor var_15843_end_mask_0 = const()[name = string("op_15843_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_15843_cast_fp16 = slice_by_index(begin = var_15843_begin_0, end = var_15843_end_0, end_mask = var_15843_end_mask_0, x = coreml_update_state_93)[name = string("op_15843_cast_fp16")]; - tensor var_15850_begin_0 = const()[name = string("op_15850_begin_0"), val = tensor([39, 0, 0, 0])]; - tensor var_15850_end_0 = const()[name = string("op_15850_end_0"), val = tensor([40, 1, 512, 256])]; - tensor var_15850_end_mask_0 = const()[name = string("op_15850_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_15850_cast_fp16 = slice_by_index(begin = var_15850_begin_0, end = var_15850_end_0, end_mask = var_15850_end_mask_0, x = coreml_update_state_93)[name = string("op_15850_cast_fp16")]; - tensor var_15889 = const()[name = string("op_15889"), val = tensor([1, 4, 1, 1])]; - tensor x_325_cast_fp16 = tile(reps = var_15889, x = var_15843_cast_fp16)[name = string("x_325_cast_fp16")]; - tensor var_15909 = const()[name = string("op_15909"), val = tensor([1, 4, 1, 1])]; - tensor x_331_cast_fp16 = tile(reps = var_15909, x = var_15850_cast_fp16)[name = string("x_331_cast_fp16")]; - bool var_15936_transpose_x_0 = const()[name = string("op_15936_transpose_x_0"), val = bool(false)]; - bool var_15936_transpose_y_0 = const()[name = string("op_15936_transpose_y_0"), val = bool(true)]; - tensor var_15936 = matmul(transpose_x = var_15936_transpose_x_0, transpose_y = var_15936_transpose_y_0, x = query_states_163_cast_fp16, y = x_325_cast_fp16)[name = string("op_15936")]; - fp16 var_15937_to_fp16 = const()[name = string("op_15937_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_81_cast_fp16 = mul(x = var_15936, y = var_15937_to_fp16)[name = string("attn_weights_81_cast_fp16")]; - tensor attn_weights_83_cast_fp16 = add(x = attn_weights_81_cast_fp16, y = mask_slice_1)[name = string("attn_weights_83_cast_fp16")]; - int32 var_15972 = const()[name = string("op_15972"), val = int32(-1)]; - tensor var_15974_cast_fp16 = softmax(axis = var_15972, x = attn_weights_83_cast_fp16)[name = string("op_15974_cast_fp16")]; - tensor concat_372 = const()[name = string("concat_372"), val = tensor([4, 64, 512])]; - tensor reshape_60_cast_fp16 = reshape(shape = concat_372, x = var_15974_cast_fp16)[name = string("reshape_60_cast_fp16")]; - tensor concat_373 = const()[name = string("concat_373"), val = tensor([4, 512, 256])]; - tensor reshape_61_cast_fp16 = reshape(shape = concat_373, x = x_331_cast_fp16)[name = string("reshape_61_cast_fp16")]; - bool matmul_20_transpose_x_0 = const()[name = string("matmul_20_transpose_x_0"), val = bool(false)]; - bool matmul_20_transpose_y_0 = const()[name = string("matmul_20_transpose_y_0"), val = bool(false)]; - tensor matmul_20_cast_fp16 = matmul(transpose_x = matmul_20_transpose_x_0, transpose_y = matmul_20_transpose_y_0, x = reshape_60_cast_fp16, y = reshape_61_cast_fp16)[name = string("matmul_20_cast_fp16")]; - tensor concat_377 = const()[name = string("concat_377"), val = tensor([1, 4, 64, 256])]; - tensor reshape_62_cast_fp16 = reshape(shape = concat_377, x = matmul_20_cast_fp16)[name = string("reshape_62_cast_fp16")]; - tensor var_15986_perm_0 = const()[name = string("op_15986_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_16005 = const()[name = string("op_16005"), val = tensor([1, 64, 1024])]; - tensor var_15986_cast_fp16 = transpose(perm = var_15986_perm_0, x = reshape_62_cast_fp16)[name = string("transpose_49")]; - tensor attn_output_205_cast_fp16 = reshape(shape = var_16005, x = var_15986_cast_fp16)[name = string("attn_output_205_cast_fp16")]; - tensor var_16010 = const()[name = string("op_16010"), val = tensor([0, 2, 1])]; - string var_16026_pad_type_0 = const()[name = string("op_16026_pad_type_0"), val = string("valid")]; - int32 var_16026_groups_0 = const()[name = string("op_16026_groups_0"), val = int32(1)]; - tensor var_16026_strides_0 = const()[name = string("op_16026_strides_0"), val = tensor([1])]; - tensor var_16026_pad_0 = const()[name = string("op_16026_pad_0"), val = tensor([0, 0])]; - tensor var_16026_dilations_0 = const()[name = string("op_16026_dilations_0"), val = tensor([1])]; - tensor squeeze_20_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433331200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434216000))))[name = string("squeeze_20_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_16011_cast_fp16 = transpose(perm = var_16010, x = attn_output_205_cast_fp16)[name = string("transpose_48")]; - tensor var_16026_cast_fp16 = conv(dilations = var_16026_dilations_0, groups = var_16026_groups_0, pad = var_16026_pad_0, pad_type = var_16026_pad_type_0, strides = var_16026_strides_0, weight = squeeze_20_cast_fp16_to_fp32_to_fp16_palettized, x = var_16011_cast_fp16)[name = string("op_16026_cast_fp16")]; - tensor var_16030 = const()[name = string("op_16030"), val = tensor([0, 2, 1])]; - int32 var_16041 = const()[name = string("op_16041"), val = int32(-1)]; - fp16 const_891_promoted_to_fp16 = const()[name = string("const_891_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_329_cast_fp16 = transpose(perm = var_16030, x = var_16026_cast_fp16)[name = string("transpose_47")]; - tensor var_16043_cast_fp16 = mul(x = hidden_states_329_cast_fp16, y = const_891_promoted_to_fp16)[name = string("op_16043_cast_fp16")]; - bool input_411_interleave_0 = const()[name = string("input_411_interleave_0"), val = bool(false)]; - tensor input_411_cast_fp16 = concat(axis = var_16041, interleave = input_411_interleave_0, values = (hidden_states_329_cast_fp16, var_16043_cast_fp16))[name = string("input_411_cast_fp16")]; - tensor normed_493_axes_0 = const()[name = string("normed_493_axes_0"), val = tensor([-1])]; - fp16 var_16038_to_fp16 = const()[name = string("op_16038_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_493_cast_fp16 = layer_norm(axes = normed_493_axes_0, epsilon = var_16038_to_fp16, x = input_411_cast_fp16)[name = string("normed_493_cast_fp16")]; - tensor normed_495_begin_0 = const()[name = string("normed_495_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_495_end_0 = const()[name = string("normed_495_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_495_end_mask_0 = const()[name = string("normed_495_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_495_cast_fp16 = slice_by_index(begin = normed_495_begin_0, end = normed_495_end_0, end_mask = normed_495_end_mask_0, x = normed_493_cast_fp16)[name = string("normed_495_cast_fp16")]; - tensor var_16057_to_fp16 = const()[name = string("op_16057_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434234496)))]; - tensor attn_output_209_cast_fp16 = mul(x = normed_495_cast_fp16, y = var_16057_to_fp16)[name = string("attn_output_209_cast_fp16")]; - tensor hidden_states_331_cast_fp16 = add(x = hidden_states_321_cast_fp16, y = attn_output_209_cast_fp16)[name = string("hidden_states_331_cast_fp16")]; - int32 var_16070 = const()[name = string("op_16070"), val = int32(-1)]; - fp16 const_895_promoted_to_fp16 = const()[name = string("const_895_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16072_cast_fp16 = mul(x = hidden_states_331_cast_fp16, y = const_895_promoted_to_fp16)[name = string("op_16072_cast_fp16")]; - bool input_413_interleave_0 = const()[name = string("input_413_interleave_0"), val = bool(false)]; - tensor input_413_cast_fp16 = concat(axis = var_16070, interleave = input_413_interleave_0, values = (hidden_states_331_cast_fp16, var_16072_cast_fp16))[name = string("input_413_cast_fp16")]; - tensor normed_497_axes_0 = const()[name = string("normed_497_axes_0"), val = tensor([-1])]; - fp16 var_16067_to_fp16 = const()[name = string("op_16067_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_497_cast_fp16 = layer_norm(axes = normed_497_axes_0, epsilon = var_16067_to_fp16, x = input_413_cast_fp16)[name = string("normed_497_cast_fp16")]; - tensor normed_499_begin_0 = const()[name = string("normed_499_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_499_end_0 = const()[name = string("normed_499_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_499_end_mask_0 = const()[name = string("normed_499_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_499_cast_fp16 = slice_by_index(begin = normed_499_begin_0, end = normed_499_end_0, end_mask = normed_499_end_mask_0, x = normed_497_cast_fp16)[name = string("normed_499_cast_fp16")]; - tensor var_16086_to_fp16 = const()[name = string("op_16086_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434236864)))]; - tensor x_333_cast_fp16 = mul(x = normed_499_cast_fp16, y = var_16086_to_fp16)[name = string("x_333_cast_fp16")]; - tensor var_16098 = const()[name = string("op_16098"), val = tensor([0, 2, 1])]; - tensor input_415_axes_0 = const()[name = string("input_415_axes_0"), val = tensor([2])]; - tensor var_16099_cast_fp16 = transpose(perm = var_16098, x = x_333_cast_fp16)[name = string("transpose_46")]; - tensor input_415_cast_fp16 = expand_dims(axes = input_415_axes_0, x = var_16099_cast_fp16)[name = string("input_415_cast_fp16")]; - string x_335_pad_type_0 = const()[name = string("x_335_pad_type_0"), val = string("valid")]; - tensor x_335_strides_0 = const()[name = string("x_335_strides_0"), val = tensor([1, 1])]; - tensor x_335_pad_0 = const()[name = string("x_335_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_335_dilations_0 = const()[name = string("x_335_dilations_0"), val = tensor([1, 1])]; - int32 x_335_groups_0 = const()[name = string("x_335_groups_0"), val = int32(1)]; - tensor model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1116095424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122067456))))[name = string("model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_335_cast_fp16 = conv(dilations = x_335_dilations_0, groups = x_335_groups_0, pad = x_335_pad_0, pad_type = x_335_pad_type_0, strides = x_335_strides_0, weight = model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_415_cast_fp16)[name = string("x_335_cast_fp16")]; - string b_41_pad_type_0 = const()[name = string("b_41_pad_type_0"), val = string("valid")]; - tensor b_41_strides_0 = const()[name = string("b_41_strides_0"), val = tensor([1, 1])]; - tensor b_41_pad_0 = const()[name = string("b_41_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_41_dilations_0 = const()[name = string("b_41_dilations_0"), val = tensor([1, 1])]; - int32 b_41_groups_0 = const()[name = string("b_41_groups_0"), val = int32(1)]; - tensor model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122178112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1128150144))))[name = string("model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_41_cast_fp16 = conv(dilations = b_41_dilations_0, groups = b_41_groups_0, pad = b_41_pad_0, pad_type = b_41_pad_type_0, strides = b_41_strides_0, weight = model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_415_cast_fp16)[name = string("b_41_cast_fp16")]; - string var_16124_mode_0 = const()[name = string("op_16124_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_16124_cast_fp16 = gelu(mode = var_16124_mode_0, x = x_335_cast_fp16)[name = string("op_16124_cast_fp16")]; - tensor input_417_cast_fp16 = mul(x = var_16124_cast_fp16, y = b_41_cast_fp16)[name = string("input_417_cast_fp16")]; - string e_41_pad_type_0 = const()[name = string("e_41_pad_type_0"), val = string("valid")]; - tensor e_41_strides_0 = const()[name = string("e_41_strides_0"), val = tensor([1, 1])]; - tensor e_41_pad_0 = const()[name = string("e_41_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_41_dilations_0 = const()[name = string("e_41_dilations_0"), val = tensor([1, 1])]; - int32 e_41_groups_0 = const()[name = string("e_41_groups_0"), val = int32(1)]; - tensor model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446404608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452376640))))[name = string("model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_41_cast_fp16 = conv(dilations = e_41_dilations_0, groups = e_41_groups_0, pad = e_41_pad_0, pad_type = e_41_pad_type_0, strides = e_41_strides_0, weight = model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_417_cast_fp16)[name = string("e_41_cast_fp16")]; - tensor var_16132_axes_0 = const()[name = string("op_16132_axes_0"), val = tensor([2])]; - tensor var_16132_cast_fp16 = squeeze(axes = var_16132_axes_0, x = e_41_cast_fp16)[name = string("op_16132_cast_fp16")]; - tensor var_16133 = const()[name = string("op_16133"), val = tensor([0, 2, 1])]; - int32 var_16144 = const()[name = string("op_16144"), val = int32(-1)]; - fp16 const_899_promoted_to_fp16 = const()[name = string("const_899_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_333_cast_fp16 = transpose(perm = var_16133, x = var_16132_cast_fp16)[name = string("transpose_45")]; - tensor var_16146_cast_fp16 = mul(x = hidden_states_333_cast_fp16, y = const_899_promoted_to_fp16)[name = string("op_16146_cast_fp16")]; - bool input_419_interleave_0 = const()[name = string("input_419_interleave_0"), val = bool(false)]; - tensor input_419_cast_fp16 = concat(axis = var_16144, interleave = input_419_interleave_0, values = (hidden_states_333_cast_fp16, var_16146_cast_fp16))[name = string("input_419_cast_fp16")]; - tensor normed_501_axes_0 = const()[name = string("normed_501_axes_0"), val = tensor([-1])]; - fp16 var_16141_to_fp16 = const()[name = string("op_16141_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_501_cast_fp16 = layer_norm(axes = normed_501_axes_0, epsilon = var_16141_to_fp16, x = input_419_cast_fp16)[name = string("normed_501_cast_fp16")]; - tensor normed_503_begin_0 = const()[name = string("normed_503_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_503_end_0 = const()[name = string("normed_503_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_503_end_mask_0 = const()[name = string("normed_503_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_503_cast_fp16 = slice_by_index(begin = normed_503_begin_0, end = normed_503_end_0, end_mask = normed_503_end_mask_0, x = normed_501_cast_fp16)[name = string("normed_503_cast_fp16")]; - tensor var_16160_to_fp16 = const()[name = string("op_16160_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452395136)))]; - tensor hidden_states_335_cast_fp16 = mul(x = normed_503_cast_fp16, y = var_16160_to_fp16)[name = string("hidden_states_335_cast_fp16")]; - tensor hidden_states_337_cast_fp16 = add(x = hidden_states_331_cast_fp16, y = hidden_states_335_cast_fp16)[name = string("hidden_states_337_cast_fp16")]; - int32 var_16214 = const()[name = string("op_16214"), val = int32(-1)]; - fp16 const_904_promoted_to_fp16 = const()[name = string("const_904_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16216_cast_fp16 = mul(x = hidden_states_337_cast_fp16, y = const_904_promoted_to_fp16)[name = string("op_16216_cast_fp16")]; - bool input_421_interleave_0 = const()[name = string("input_421_interleave_0"), val = bool(false)]; - tensor input_421_cast_fp16 = concat(axis = var_16214, interleave = input_421_interleave_0, values = (hidden_states_337_cast_fp16, var_16216_cast_fp16))[name = string("input_421_cast_fp16")]; - tensor normed_505_axes_0 = const()[name = string("normed_505_axes_0"), val = tensor([-1])]; - fp16 var_16211_to_fp16 = const()[name = string("op_16211_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_505_cast_fp16 = layer_norm(axes = normed_505_axes_0, epsilon = var_16211_to_fp16, x = input_421_cast_fp16)[name = string("normed_505_cast_fp16")]; - tensor normed_507_begin_0 = const()[name = string("normed_507_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_507_end_0 = const()[name = string("normed_507_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_507_end_mask_0 = const()[name = string("normed_507_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_507_cast_fp16 = slice_by_index(begin = normed_507_begin_0, end = normed_507_end_0, end_mask = normed_507_end_mask_0, x = normed_505_cast_fp16)[name = string("normed_507_cast_fp16")]; - tensor var_16230_to_fp16 = const()[name = string("op_16230_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452397504)))]; - tensor hidden_states_339_cast_fp16 = mul(x = normed_507_cast_fp16, y = var_16230_to_fp16)[name = string("hidden_states_339_cast_fp16")]; - tensor var_16241 = const()[name = string("op_16241"), val = tensor([0, 2, 1])]; - tensor var_16244_axes_0 = const()[name = string("op_16244_axes_0"), val = tensor([2])]; - tensor var_16242_cast_fp16 = transpose(perm = var_16241, x = hidden_states_339_cast_fp16)[name = string("transpose_44")]; - tensor var_16244_cast_fp16 = expand_dims(axes = var_16244_axes_0, x = var_16242_cast_fp16)[name = string("op_16244_cast_fp16")]; - string query_states_169_pad_type_0 = const()[name = string("query_states_169_pad_type_0"), val = string("valid")]; - tensor query_states_169_strides_0 = const()[name = string("query_states_169_strides_0"), val = tensor([1, 1])]; - tensor query_states_169_pad_0 = const()[name = string("query_states_169_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_169_dilations_0 = const()[name = string("query_states_169_dilations_0"), val = tensor([1, 1])]; - int32 query_states_169_groups_0 = const()[name = string("query_states_169_groups_0"), val = int32(1)]; - tensor query_states_169 = conv(dilations = query_states_169_dilations_0, groups = query_states_169_groups_0, pad = query_states_169_pad_0, pad_type = query_states_169_pad_type_0, strides = query_states_169_strides_0, weight = model_model_layers_21_self_attn_q_proj_weight_palettized, x = var_16244_cast_fp16)[name = string("query_states_169")]; - string key_states_211_pad_type_0 = const()[name = string("key_states_211_pad_type_0"), val = string("valid")]; - tensor key_states_211_strides_0 = const()[name = string("key_states_211_strides_0"), val = tensor([1, 1])]; - tensor key_states_211_pad_0 = const()[name = string("key_states_211_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_211_dilations_0 = const()[name = string("key_states_211_dilations_0"), val = tensor([1, 1])]; - int32 key_states_211_groups_0 = const()[name = string("key_states_211_groups_0"), val = int32(1)]; - tensor key_states_211 = conv(dilations = key_states_211_dilations_0, groups = key_states_211_groups_0, pad = key_states_211_pad_0, pad_type = key_states_211_pad_type_0, strides = key_states_211_strides_0, weight = model_model_layers_21_self_attn_k_proj_weight_palettized, x = var_16244_cast_fp16)[name = string("key_states_211")]; - string value_states_169_pad_type_0 = const()[name = string("value_states_169_pad_type_0"), val = string("valid")]; - tensor value_states_169_strides_0 = const()[name = string("value_states_169_strides_0"), val = tensor([1, 1])]; - tensor value_states_169_pad_0 = const()[name = string("value_states_169_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_169_dilations_0 = const()[name = string("value_states_169_dilations_0"), val = tensor([1, 1])]; - int32 value_states_169_groups_0 = const()[name = string("value_states_169_groups_0"), val = int32(1)]; - tensor value_states_169 = conv(dilations = value_states_169_dilations_0, groups = value_states_169_groups_0, pad = value_states_169_pad_0, pad_type = value_states_169_pad_type_0, strides = value_states_169_strides_0, weight = model_model_layers_21_self_attn_v_proj_weight_palettized, x = var_16244_cast_fp16)[name = string("value_states_169")]; - tensor var_16286 = const()[name = string("op_16286"), val = tensor([1, 4, 256, 64])]; - tensor var_16287 = reshape(shape = var_16286, x = query_states_169)[name = string("op_16287")]; - tensor var_16292 = const()[name = string("op_16292"), val = tensor([0, 1, 3, 2])]; - tensor var_16297 = const()[name = string("op_16297"), val = tensor([1, 1, 256, 64])]; - tensor var_16298 = reshape(shape = var_16297, x = key_states_211)[name = string("op_16298")]; - tensor var_16303 = const()[name = string("op_16303"), val = tensor([0, 1, 3, 2])]; - tensor var_16308 = const()[name = string("op_16308"), val = tensor([1, 1, 256, 64])]; - tensor var_16309 = reshape(shape = var_16308, x = value_states_169)[name = string("op_16309")]; - tensor var_16314 = const()[name = string("op_16314"), val = tensor([0, 1, 3, 2])]; - int32 var_16325 = const()[name = string("op_16325"), val = int32(-1)]; - fp16 const_909_promoted = const()[name = string("const_909_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_341 = transpose(perm = var_16292, x = var_16287)[name = string("transpose_43")]; - tensor var_16327 = mul(x = hidden_states_341, y = const_909_promoted)[name = string("op_16327")]; - bool input_425_interleave_0 = const()[name = string("input_425_interleave_0"), val = bool(false)]; - tensor input_425 = concat(axis = var_16325, interleave = input_425_interleave_0, values = (hidden_states_341, var_16327))[name = string("input_425")]; - tensor normed_509_axes_0 = const()[name = string("normed_509_axes_0"), val = tensor([-1])]; - fp16 var_16322_to_fp16 = const()[name = string("op_16322_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_509_cast_fp16 = layer_norm(axes = normed_509_axes_0, epsilon = var_16322_to_fp16, x = input_425)[name = string("normed_509_cast_fp16")]; - tensor normed_511_begin_0 = const()[name = string("normed_511_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_511_end_0 = const()[name = string("normed_511_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_511_end_mask_0 = const()[name = string("normed_511_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_511 = slice_by_index(begin = normed_511_begin_0, end = normed_511_end_0, end_mask = normed_511_end_mask_0, x = normed_509_cast_fp16)[name = string("normed_511")]; - tensor var_16341_to_fp16 = const()[name = string("op_16341_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452399872)))]; - tensor q_43_cast_fp16 = mul(x = normed_511, y = var_16341_to_fp16)[name = string("q_43_cast_fp16")]; - int32 var_16352 = const()[name = string("op_16352"), val = int32(-1)]; - fp16 const_913_promoted = const()[name = string("const_913_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_343 = transpose(perm = var_16303, x = var_16298)[name = string("transpose_42")]; - tensor var_16354 = mul(x = hidden_states_343, y = const_913_promoted)[name = string("op_16354")]; - bool input_427_interleave_0 = const()[name = string("input_427_interleave_0"), val = bool(false)]; - tensor input_427 = concat(axis = var_16352, interleave = input_427_interleave_0, values = (hidden_states_343, var_16354))[name = string("input_427")]; - tensor normed_513_axes_0 = const()[name = string("normed_513_axes_0"), val = tensor([-1])]; - fp16 var_16349_to_fp16 = const()[name = string("op_16349_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_513_cast_fp16 = layer_norm(axes = normed_513_axes_0, epsilon = var_16349_to_fp16, x = input_427)[name = string("normed_513_cast_fp16")]; - tensor normed_515_begin_0 = const()[name = string("normed_515_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_515_end_0 = const()[name = string("normed_515_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_515_end_mask_0 = const()[name = string("normed_515_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_515 = slice_by_index(begin = normed_515_begin_0, end = normed_515_end_0, end_mask = normed_515_end_mask_0, x = normed_513_cast_fp16)[name = string("normed_515")]; - tensor var_16368_to_fp16 = const()[name = string("op_16368_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452400448)))]; - tensor k_43_cast_fp16 = mul(x = normed_515, y = var_16368_to_fp16)[name = string("k_43_cast_fp16")]; - tensor var_16382_cast_fp16 = mul(x = q_43_cast_fp16, y = cos_5)[name = string("op_16382_cast_fp16")]; - tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_85_cast_fp16 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_43_cast_fp16)[name = string("x1_85_cast_fp16")]; - tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_85_cast_fp16 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_43_cast_fp16)[name = string("x2_85_cast_fp16")]; - fp16 const_919_promoted_to_fp16 = const()[name = string("const_919_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16403_cast_fp16 = mul(x = x2_85_cast_fp16, y = const_919_promoted_to_fp16)[name = string("op_16403_cast_fp16")]; - int32 var_16405 = const()[name = string("op_16405"), val = int32(-1)]; - bool var_16406_interleave_0 = const()[name = string("op_16406_interleave_0"), val = bool(false)]; - tensor var_16406_cast_fp16 = concat(axis = var_16405, interleave = var_16406_interleave_0, values = (var_16403_cast_fp16, x1_85_cast_fp16))[name = string("op_16406_cast_fp16")]; - tensor var_16407_cast_fp16 = mul(x = var_16406_cast_fp16, y = sin_5)[name = string("op_16407_cast_fp16")]; - tensor query_states_171_cast_fp16 = add(x = var_16382_cast_fp16, y = var_16407_cast_fp16)[name = string("query_states_171_cast_fp16")]; - tensor var_16410_cast_fp16 = mul(x = k_43_cast_fp16, y = cos_5)[name = string("op_16410_cast_fp16")]; - tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_87_cast_fp16 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_43_cast_fp16)[name = string("x1_87_cast_fp16")]; - tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_87_cast_fp16 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_43_cast_fp16)[name = string("x2_87_cast_fp16")]; - fp16 const_922_promoted_to_fp16 = const()[name = string("const_922_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16431_cast_fp16 = mul(x = x2_87_cast_fp16, y = const_922_promoted_to_fp16)[name = string("op_16431_cast_fp16")]; - int32 var_16433 = const()[name = string("op_16433"), val = int32(-1)]; - bool var_16434_interleave_0 = const()[name = string("op_16434_interleave_0"), val = bool(false)]; - tensor var_16434_cast_fp16 = concat(axis = var_16433, interleave = var_16434_interleave_0, values = (var_16431_cast_fp16, x1_87_cast_fp16))[name = string("op_16434_cast_fp16")]; - tensor var_16435_cast_fp16 = mul(x = var_16434_cast_fp16, y = sin_5)[name = string("op_16435_cast_fp16")]; - tensor key_states_213_cast_fp16 = add(x = var_16410_cast_fp16, y = var_16435_cast_fp16)[name = string("key_states_213_cast_fp16")]; - tensor expand_dims_252 = const()[name = string("expand_dims_252"), val = tensor([18])]; - tensor expand_dims_253 = const()[name = string("expand_dims_253"), val = tensor([0])]; - tensor expand_dims_255 = const()[name = string("expand_dims_255"), val = tensor([0])]; - tensor expand_dims_256 = const()[name = string("expand_dims_256"), val = tensor([19])]; - int32 concat_380_axis_0 = const()[name = string("concat_380_axis_0"), val = int32(0)]; - bool concat_380_interleave_0 = const()[name = string("concat_380_interleave_0"), val = bool(false)]; - tensor concat_380 = concat(axis = concat_380_axis_0, interleave = concat_380_interleave_0, values = (expand_dims_252, expand_dims_253, current_pos, expand_dims_255))[name = string("concat_380")]; - tensor concat_381_values1_0 = const()[name = string("concat_381_values1_0"), val = tensor([0])]; - tensor concat_381_values3_0 = const()[name = string("concat_381_values3_0"), val = tensor([0])]; - int32 concat_381_axis_0 = const()[name = string("concat_381_axis_0"), val = int32(0)]; - bool concat_381_interleave_0 = const()[name = string("concat_381_interleave_0"), val = bool(false)]; - tensor concat_381 = concat(axis = concat_381_axis_0, interleave = concat_381_interleave_0, values = (expand_dims_256, concat_381_values1_0, end_pos_1, concat_381_values3_0))[name = string("concat_381")]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16 = slice_update(begin = concat_380, begin_mask = model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0, end = concat_381, end_mask = model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_37_stride_0, update = key_states_213_cast_fp16, x = coreml_update_state_93)[name = string("model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_146_write_state")]; - tensor coreml_update_state_94 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_146")]; - tensor expand_dims_258 = const()[name = string("expand_dims_258"), val = tensor([40])]; - tensor expand_dims_259 = const()[name = string("expand_dims_259"), val = tensor([0])]; - tensor expand_dims_261 = const()[name = string("expand_dims_261"), val = tensor([0])]; - tensor expand_dims_262 = const()[name = string("expand_dims_262"), val = tensor([41])]; - int32 concat_384_axis_0 = const()[name = string("concat_384_axis_0"), val = int32(0)]; - bool concat_384_interleave_0 = const()[name = string("concat_384_interleave_0"), val = bool(false)]; - tensor concat_384 = concat(axis = concat_384_axis_0, interleave = concat_384_interleave_0, values = (expand_dims_258, expand_dims_259, current_pos, expand_dims_261))[name = string("concat_384")]; - tensor concat_385_values1_0 = const()[name = string("concat_385_values1_0"), val = tensor([0])]; - tensor concat_385_values3_0 = const()[name = string("concat_385_values3_0"), val = tensor([0])]; - int32 concat_385_axis_0 = const()[name = string("concat_385_axis_0"), val = int32(0)]; - bool concat_385_interleave_0 = const()[name = string("concat_385_interleave_0"), val = bool(false)]; - tensor concat_385 = concat(axis = concat_385_axis_0, interleave = concat_385_interleave_0, values = (expand_dims_262, concat_385_values1_0, end_pos_1, concat_385_values3_0))[name = string("concat_385")]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_171 = transpose(perm = var_16314, x = var_16309)[name = string("transpose_41")]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16 = slice_update(begin = concat_384, begin_mask = model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0, end = concat_385, end_mask = model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_38_stride_0, update = value_states_171, x = coreml_update_state_94)[name = string("model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_147_write_state")]; - tensor coreml_update_state_95 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_147")]; - tensor var_16534_begin_0 = const()[name = string("op_16534_begin_0"), val = tensor([18, 0, 0, 0])]; - tensor var_16534_end_0 = const()[name = string("op_16534_end_0"), val = tensor([19, 1, 512, 256])]; - tensor var_16534_end_mask_0 = const()[name = string("op_16534_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_16534_cast_fp16 = slice_by_index(begin = var_16534_begin_0, end = var_16534_end_0, end_mask = var_16534_end_mask_0, x = coreml_update_state_95)[name = string("op_16534_cast_fp16")]; - tensor var_16541_begin_0 = const()[name = string("op_16541_begin_0"), val = tensor([40, 0, 0, 0])]; - tensor var_16541_end_0 = const()[name = string("op_16541_end_0"), val = tensor([41, 1, 512, 256])]; - tensor var_16541_end_mask_0 = const()[name = string("op_16541_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_16541_cast_fp16 = slice_by_index(begin = var_16541_begin_0, end = var_16541_end_0, end_mask = var_16541_end_mask_0, x = coreml_update_state_95)[name = string("op_16541_cast_fp16")]; - tensor var_16580 = const()[name = string("op_16580"), val = tensor([1, 4, 1, 1])]; - tensor x_341_cast_fp16 = tile(reps = var_16580, x = var_16534_cast_fp16)[name = string("x_341_cast_fp16")]; - tensor var_16600 = const()[name = string("op_16600"), val = tensor([1, 4, 1, 1])]; - tensor x_347_cast_fp16 = tile(reps = var_16600, x = var_16541_cast_fp16)[name = string("x_347_cast_fp16")]; - bool var_16627_transpose_x_0 = const()[name = string("op_16627_transpose_x_0"), val = bool(false)]; - bool var_16627_transpose_y_0 = const()[name = string("op_16627_transpose_y_0"), val = bool(true)]; - tensor var_16627 = matmul(transpose_x = var_16627_transpose_x_0, transpose_y = var_16627_transpose_y_0, x = query_states_171_cast_fp16, y = x_341_cast_fp16)[name = string("op_16627")]; - fp16 var_16628_to_fp16 = const()[name = string("op_16628_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_85_cast_fp16 = mul(x = var_16627, y = var_16628_to_fp16)[name = string("attn_weights_85_cast_fp16")]; - tensor attn_weights_87_cast_fp16 = add(x = attn_weights_85_cast_fp16, y = mask_slice_1)[name = string("attn_weights_87_cast_fp16")]; - int32 var_16663 = const()[name = string("op_16663"), val = int32(-1)]; - tensor var_16665_cast_fp16 = softmax(axis = var_16663, x = attn_weights_87_cast_fp16)[name = string("op_16665_cast_fp16")]; - tensor concat_390 = const()[name = string("concat_390"), val = tensor([4, 64, 512])]; - tensor reshape_63_cast_fp16 = reshape(shape = concat_390, x = var_16665_cast_fp16)[name = string("reshape_63_cast_fp16")]; - tensor concat_391 = const()[name = string("concat_391"), val = tensor([4, 512, 256])]; - tensor reshape_64_cast_fp16 = reshape(shape = concat_391, x = x_347_cast_fp16)[name = string("reshape_64_cast_fp16")]; - bool matmul_21_transpose_x_0 = const()[name = string("matmul_21_transpose_x_0"), val = bool(false)]; - bool matmul_21_transpose_y_0 = const()[name = string("matmul_21_transpose_y_0"), val = bool(false)]; - tensor matmul_21_cast_fp16 = matmul(transpose_x = matmul_21_transpose_x_0, transpose_y = matmul_21_transpose_y_0, x = reshape_63_cast_fp16, y = reshape_64_cast_fp16)[name = string("matmul_21_cast_fp16")]; - tensor concat_395 = const()[name = string("concat_395"), val = tensor([1, 4, 64, 256])]; - tensor reshape_65_cast_fp16 = reshape(shape = concat_395, x = matmul_21_cast_fp16)[name = string("reshape_65_cast_fp16")]; - tensor var_16677_perm_0 = const()[name = string("op_16677_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_16696 = const()[name = string("op_16696"), val = tensor([1, 64, 1024])]; - tensor var_16677_cast_fp16 = transpose(perm = var_16677_perm_0, x = reshape_65_cast_fp16)[name = string("transpose_40")]; - tensor attn_output_215_cast_fp16 = reshape(shape = var_16696, x = var_16677_cast_fp16)[name = string("attn_output_215_cast_fp16")]; - tensor var_16701 = const()[name = string("op_16701"), val = tensor([0, 2, 1])]; - string var_16717_pad_type_0 = const()[name = string("op_16717_pad_type_0"), val = string("valid")]; - int32 var_16717_groups_0 = const()[name = string("op_16717_groups_0"), val = int32(1)]; - tensor var_16717_strides_0 = const()[name = string("op_16717_strides_0"), val = tensor([1])]; - tensor var_16717_pad_0 = const()[name = string("op_16717_pad_0"), val = tensor([0, 0])]; - tensor var_16717_dilations_0 = const()[name = string("op_16717_dilations_0"), val = tensor([1])]; - tensor squeeze_21_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452401024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453285824))))[name = string("squeeze_21_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_16702_cast_fp16 = transpose(perm = var_16701, x = attn_output_215_cast_fp16)[name = string("transpose_39")]; - tensor var_16717_cast_fp16 = conv(dilations = var_16717_dilations_0, groups = var_16717_groups_0, pad = var_16717_pad_0, pad_type = var_16717_pad_type_0, strides = var_16717_strides_0, weight = squeeze_21_cast_fp16_to_fp32_to_fp16_palettized, x = var_16702_cast_fp16)[name = string("op_16717_cast_fp16")]; - tensor var_16721 = const()[name = string("op_16721"), val = tensor([0, 2, 1])]; - int32 var_16732 = const()[name = string("op_16732"), val = int32(-1)]; - fp16 const_934_promoted_to_fp16 = const()[name = string("const_934_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_345_cast_fp16 = transpose(perm = var_16721, x = var_16717_cast_fp16)[name = string("transpose_38")]; - tensor var_16734_cast_fp16 = mul(x = hidden_states_345_cast_fp16, y = const_934_promoted_to_fp16)[name = string("op_16734_cast_fp16")]; - bool input_431_interleave_0 = const()[name = string("input_431_interleave_0"), val = bool(false)]; - tensor input_431_cast_fp16 = concat(axis = var_16732, interleave = input_431_interleave_0, values = (hidden_states_345_cast_fp16, var_16734_cast_fp16))[name = string("input_431_cast_fp16")]; - tensor normed_517_axes_0 = const()[name = string("normed_517_axes_0"), val = tensor([-1])]; - fp16 var_16729_to_fp16 = const()[name = string("op_16729_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_517_cast_fp16 = layer_norm(axes = normed_517_axes_0, epsilon = var_16729_to_fp16, x = input_431_cast_fp16)[name = string("normed_517_cast_fp16")]; - tensor normed_519_begin_0 = const()[name = string("normed_519_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_519_end_0 = const()[name = string("normed_519_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_519_end_mask_0 = const()[name = string("normed_519_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_519_cast_fp16 = slice_by_index(begin = normed_519_begin_0, end = normed_519_end_0, end_mask = normed_519_end_mask_0, x = normed_517_cast_fp16)[name = string("normed_519_cast_fp16")]; - tensor var_16748_to_fp16 = const()[name = string("op_16748_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453304320)))]; - tensor attn_output_219_cast_fp16 = mul(x = normed_519_cast_fp16, y = var_16748_to_fp16)[name = string("attn_output_219_cast_fp16")]; - tensor hidden_states_347_cast_fp16 = add(x = hidden_states_337_cast_fp16, y = attn_output_219_cast_fp16)[name = string("hidden_states_347_cast_fp16")]; - int32 var_16761 = const()[name = string("op_16761"), val = int32(-1)]; - fp16 const_938_promoted_to_fp16 = const()[name = string("const_938_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16763_cast_fp16 = mul(x = hidden_states_347_cast_fp16, y = const_938_promoted_to_fp16)[name = string("op_16763_cast_fp16")]; - bool input_433_interleave_0 = const()[name = string("input_433_interleave_0"), val = bool(false)]; - tensor input_433_cast_fp16 = concat(axis = var_16761, interleave = input_433_interleave_0, values = (hidden_states_347_cast_fp16, var_16763_cast_fp16))[name = string("input_433_cast_fp16")]; - tensor normed_521_axes_0 = const()[name = string("normed_521_axes_0"), val = tensor([-1])]; - fp16 var_16758_to_fp16 = const()[name = string("op_16758_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_521_cast_fp16 = layer_norm(axes = normed_521_axes_0, epsilon = var_16758_to_fp16, x = input_433_cast_fp16)[name = string("normed_521_cast_fp16")]; - tensor normed_523_begin_0 = const()[name = string("normed_523_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_523_end_0 = const()[name = string("normed_523_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_523_end_mask_0 = const()[name = string("normed_523_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_523_cast_fp16 = slice_by_index(begin = normed_523_begin_0, end = normed_523_end_0, end_mask = normed_523_end_mask_0, x = normed_521_cast_fp16)[name = string("normed_523_cast_fp16")]; - tensor var_16777_to_fp16 = const()[name = string("op_16777_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453306688)))]; - tensor x_349_cast_fp16 = mul(x = normed_523_cast_fp16, y = var_16777_to_fp16)[name = string("x_349_cast_fp16")]; - tensor var_16789 = const()[name = string("op_16789"), val = tensor([0, 2, 1])]; - tensor input_435_axes_0 = const()[name = string("input_435_axes_0"), val = tensor([2])]; - tensor var_16790_cast_fp16 = transpose(perm = var_16789, x = x_349_cast_fp16)[name = string("transpose_37")]; - tensor input_435_cast_fp16 = expand_dims(axes = input_435_axes_0, x = var_16790_cast_fp16)[name = string("input_435_cast_fp16")]; - string x_351_pad_type_0 = const()[name = string("x_351_pad_type_0"), val = string("valid")]; - tensor x_351_strides_0 = const()[name = string("x_351_strides_0"), val = tensor([1, 1])]; - tensor x_351_pad_0 = const()[name = string("x_351_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_351_dilations_0 = const()[name = string("x_351_dilations_0"), val = tensor([1, 1])]; - int32 x_351_groups_0 = const()[name = string("x_351_groups_0"), val = int32(1)]; - tensor model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1128260800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1134232832))))[name = string("model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_351_cast_fp16 = conv(dilations = x_351_dilations_0, groups = x_351_groups_0, pad = x_351_pad_0, pad_type = x_351_pad_type_0, strides = x_351_strides_0, weight = model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_435_cast_fp16)[name = string("x_351_cast_fp16")]; - string b_43_pad_type_0 = const()[name = string("b_43_pad_type_0"), val = string("valid")]; - tensor b_43_strides_0 = const()[name = string("b_43_strides_0"), val = tensor([1, 1])]; - tensor b_43_pad_0 = const()[name = string("b_43_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_43_dilations_0 = const()[name = string("b_43_dilations_0"), val = tensor([1, 1])]; - int32 b_43_groups_0 = const()[name = string("b_43_groups_0"), val = int32(1)]; - tensor model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1134343488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1140315520))))[name = string("model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_43_cast_fp16 = conv(dilations = b_43_dilations_0, groups = b_43_groups_0, pad = b_43_pad_0, pad_type = b_43_pad_type_0, strides = b_43_strides_0, weight = model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_435_cast_fp16)[name = string("b_43_cast_fp16")]; - string var_16815_mode_0 = const()[name = string("op_16815_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_16815_cast_fp16 = gelu(mode = var_16815_mode_0, x = x_351_cast_fp16)[name = string("op_16815_cast_fp16")]; - tensor input_437_cast_fp16 = mul(x = var_16815_cast_fp16, y = b_43_cast_fp16)[name = string("input_437_cast_fp16")]; - string e_43_pad_type_0 = const()[name = string("e_43_pad_type_0"), val = string("valid")]; - tensor e_43_strides_0 = const()[name = string("e_43_strides_0"), val = tensor([1, 1])]; - tensor e_43_pad_0 = const()[name = string("e_43_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_43_dilations_0 = const()[name = string("e_43_dilations_0"), val = tensor([1, 1])]; - int32 e_43_groups_0 = const()[name = string("e_43_groups_0"), val = int32(1)]; - tensor model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465474432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471446464))))[name = string("model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_43_cast_fp16 = conv(dilations = e_43_dilations_0, groups = e_43_groups_0, pad = e_43_pad_0, pad_type = e_43_pad_type_0, strides = e_43_strides_0, weight = model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_437_cast_fp16)[name = string("e_43_cast_fp16")]; - tensor var_16823_axes_0 = const()[name = string("op_16823_axes_0"), val = tensor([2])]; - tensor var_16823_cast_fp16 = squeeze(axes = var_16823_axes_0, x = e_43_cast_fp16)[name = string("op_16823_cast_fp16")]; - tensor var_16824 = const()[name = string("op_16824"), val = tensor([0, 2, 1])]; - int32 var_16835 = const()[name = string("op_16835"), val = int32(-1)]; - fp16 const_942_promoted_to_fp16 = const()[name = string("const_942_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_349_cast_fp16 = transpose(perm = var_16824, x = var_16823_cast_fp16)[name = string("transpose_36")]; - tensor var_16837_cast_fp16 = mul(x = hidden_states_349_cast_fp16, y = const_942_promoted_to_fp16)[name = string("op_16837_cast_fp16")]; - bool input_439_interleave_0 = const()[name = string("input_439_interleave_0"), val = bool(false)]; - tensor input_439_cast_fp16 = concat(axis = var_16835, interleave = input_439_interleave_0, values = (hidden_states_349_cast_fp16, var_16837_cast_fp16))[name = string("input_439_cast_fp16")]; - tensor normed_525_axes_0 = const()[name = string("normed_525_axes_0"), val = tensor([-1])]; - fp16 var_16832_to_fp16 = const()[name = string("op_16832_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_525_cast_fp16 = layer_norm(axes = normed_525_axes_0, epsilon = var_16832_to_fp16, x = input_439_cast_fp16)[name = string("normed_525_cast_fp16")]; - tensor normed_527_begin_0 = const()[name = string("normed_527_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_527_end_0 = const()[name = string("normed_527_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_527_end_mask_0 = const()[name = string("normed_527_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_527_cast_fp16 = slice_by_index(begin = normed_527_begin_0, end = normed_527_end_0, end_mask = normed_527_end_mask_0, x = normed_525_cast_fp16)[name = string("normed_527_cast_fp16")]; - tensor var_16851_to_fp16 = const()[name = string("op_16851_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471464960)))]; - tensor hidden_states_351_cast_fp16 = mul(x = normed_527_cast_fp16, y = var_16851_to_fp16)[name = string("hidden_states_351_cast_fp16")]; - tensor hidden_states_353_cast_fp16 = add(x = hidden_states_347_cast_fp16, y = hidden_states_351_cast_fp16)[name = string("hidden_states_353_cast_fp16")]; - int32 var_16905 = const()[name = string("op_16905"), val = int32(-1)]; - fp16 const_947_promoted_to_fp16 = const()[name = string("const_947_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16907_cast_fp16 = mul(x = hidden_states_353_cast_fp16, y = const_947_promoted_to_fp16)[name = string("op_16907_cast_fp16")]; - bool input_441_interleave_0 = const()[name = string("input_441_interleave_0"), val = bool(false)]; - tensor input_441_cast_fp16 = concat(axis = var_16905, interleave = input_441_interleave_0, values = (hidden_states_353_cast_fp16, var_16907_cast_fp16))[name = string("input_441_cast_fp16")]; - tensor normed_529_axes_0 = const()[name = string("normed_529_axes_0"), val = tensor([-1])]; - fp16 var_16902_to_fp16 = const()[name = string("op_16902_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_529_cast_fp16 = layer_norm(axes = normed_529_axes_0, epsilon = var_16902_to_fp16, x = input_441_cast_fp16)[name = string("normed_529_cast_fp16")]; - tensor normed_531_begin_0 = const()[name = string("normed_531_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_531_end_0 = const()[name = string("normed_531_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_531_end_mask_0 = const()[name = string("normed_531_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_531_cast_fp16 = slice_by_index(begin = normed_531_begin_0, end = normed_531_end_0, end_mask = normed_531_end_mask_0, x = normed_529_cast_fp16)[name = string("normed_531_cast_fp16")]; - tensor var_16921_to_fp16 = const()[name = string("op_16921_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471467328)))]; - tensor hidden_states_355_cast_fp16 = mul(x = normed_531_cast_fp16, y = var_16921_to_fp16)[name = string("hidden_states_355_cast_fp16")]; - tensor var_16932 = const()[name = string("op_16932"), val = tensor([0, 2, 1])]; - tensor var_16935_axes_0 = const()[name = string("op_16935_axes_0"), val = tensor([2])]; - tensor var_16933_cast_fp16 = transpose(perm = var_16932, x = hidden_states_355_cast_fp16)[name = string("transpose_35")]; - tensor var_16935_cast_fp16 = expand_dims(axes = var_16935_axes_0, x = var_16933_cast_fp16)[name = string("op_16935_cast_fp16")]; - string query_states_177_pad_type_0 = const()[name = string("query_states_177_pad_type_0"), val = string("valid")]; - tensor query_states_177_strides_0 = const()[name = string("query_states_177_strides_0"), val = tensor([1, 1])]; - tensor query_states_177_pad_0 = const()[name = string("query_states_177_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_177_dilations_0 = const()[name = string("query_states_177_dilations_0"), val = tensor([1, 1])]; - int32 query_states_177_groups_0 = const()[name = string("query_states_177_groups_0"), val = int32(1)]; - tensor query_states_177 = conv(dilations = query_states_177_dilations_0, groups = query_states_177_groups_0, pad = query_states_177_pad_0, pad_type = query_states_177_pad_type_0, strides = query_states_177_strides_0, weight = model_model_layers_22_self_attn_q_proj_weight_palettized, x = var_16935_cast_fp16)[name = string("query_states_177")]; - string key_states_221_pad_type_0 = const()[name = string("key_states_221_pad_type_0"), val = string("valid")]; - tensor key_states_221_strides_0 = const()[name = string("key_states_221_strides_0"), val = tensor([1, 1])]; - tensor key_states_221_pad_0 = const()[name = string("key_states_221_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_221_dilations_0 = const()[name = string("key_states_221_dilations_0"), val = tensor([1, 1])]; - int32 key_states_221_groups_0 = const()[name = string("key_states_221_groups_0"), val = int32(1)]; - tensor key_states_221 = conv(dilations = key_states_221_dilations_0, groups = key_states_221_groups_0, pad = key_states_221_pad_0, pad_type = key_states_221_pad_type_0, strides = key_states_221_strides_0, weight = model_model_layers_22_self_attn_k_proj_weight_palettized, x = var_16935_cast_fp16)[name = string("key_states_221")]; - string value_states_177_pad_type_0 = const()[name = string("value_states_177_pad_type_0"), val = string("valid")]; - tensor value_states_177_strides_0 = const()[name = string("value_states_177_strides_0"), val = tensor([1, 1])]; - tensor value_states_177_pad_0 = const()[name = string("value_states_177_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_177_dilations_0 = const()[name = string("value_states_177_dilations_0"), val = tensor([1, 1])]; - int32 value_states_177_groups_0 = const()[name = string("value_states_177_groups_0"), val = int32(1)]; - tensor value_states_177 = conv(dilations = value_states_177_dilations_0, groups = value_states_177_groups_0, pad = value_states_177_pad_0, pad_type = value_states_177_pad_type_0, strides = value_states_177_strides_0, weight = model_model_layers_22_self_attn_v_proj_weight_palettized, x = var_16935_cast_fp16)[name = string("value_states_177")]; - tensor var_16977 = const()[name = string("op_16977"), val = tensor([1, 4, 256, 64])]; - tensor var_16978 = reshape(shape = var_16977, x = query_states_177)[name = string("op_16978")]; - tensor var_16983 = const()[name = string("op_16983"), val = tensor([0, 1, 3, 2])]; - tensor var_16988 = const()[name = string("op_16988"), val = tensor([1, 1, 256, 64])]; - tensor var_16989 = reshape(shape = var_16988, x = key_states_221)[name = string("op_16989")]; - tensor var_16994 = const()[name = string("op_16994"), val = tensor([0, 1, 3, 2])]; - tensor var_16999 = const()[name = string("op_16999"), val = tensor([1, 1, 256, 64])]; - tensor var_17000 = reshape(shape = var_16999, x = value_states_177)[name = string("op_17000")]; - tensor var_17005 = const()[name = string("op_17005"), val = tensor([0, 1, 3, 2])]; - int32 var_17016 = const()[name = string("op_17016"), val = int32(-1)]; - fp16 const_952_promoted = const()[name = string("const_952_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_357 = transpose(perm = var_16983, x = var_16978)[name = string("transpose_34")]; - tensor var_17018 = mul(x = hidden_states_357, y = const_952_promoted)[name = string("op_17018")]; - bool input_445_interleave_0 = const()[name = string("input_445_interleave_0"), val = bool(false)]; - tensor input_445 = concat(axis = var_17016, interleave = input_445_interleave_0, values = (hidden_states_357, var_17018))[name = string("input_445")]; - tensor normed_533_axes_0 = const()[name = string("normed_533_axes_0"), val = tensor([-1])]; - fp16 var_17013_to_fp16 = const()[name = string("op_17013_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_533_cast_fp16 = layer_norm(axes = normed_533_axes_0, epsilon = var_17013_to_fp16, x = input_445)[name = string("normed_533_cast_fp16")]; - tensor normed_535_begin_0 = const()[name = string("normed_535_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_535_end_0 = const()[name = string("normed_535_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_535_end_mask_0 = const()[name = string("normed_535_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_535 = slice_by_index(begin = normed_535_begin_0, end = normed_535_end_0, end_mask = normed_535_end_mask_0, x = normed_533_cast_fp16)[name = string("normed_535")]; - tensor var_17032_to_fp16 = const()[name = string("op_17032_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471469696)))]; - tensor q_45_cast_fp16 = mul(x = normed_535, y = var_17032_to_fp16)[name = string("q_45_cast_fp16")]; - int32 var_17043 = const()[name = string("op_17043"), val = int32(-1)]; - fp16 const_956_promoted = const()[name = string("const_956_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_359 = transpose(perm = var_16994, x = var_16989)[name = string("transpose_33")]; - tensor var_17045 = mul(x = hidden_states_359, y = const_956_promoted)[name = string("op_17045")]; - bool input_447_interleave_0 = const()[name = string("input_447_interleave_0"), val = bool(false)]; - tensor input_447 = concat(axis = var_17043, interleave = input_447_interleave_0, values = (hidden_states_359, var_17045))[name = string("input_447")]; - tensor normed_537_axes_0 = const()[name = string("normed_537_axes_0"), val = tensor([-1])]; - fp16 var_17040_to_fp16 = const()[name = string("op_17040_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_537_cast_fp16 = layer_norm(axes = normed_537_axes_0, epsilon = var_17040_to_fp16, x = input_447)[name = string("normed_537_cast_fp16")]; - tensor normed_539_begin_0 = const()[name = string("normed_539_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_539_end_0 = const()[name = string("normed_539_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_539_end_mask_0 = const()[name = string("normed_539_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_539 = slice_by_index(begin = normed_539_begin_0, end = normed_539_end_0, end_mask = normed_539_end_mask_0, x = normed_537_cast_fp16)[name = string("normed_539")]; - tensor var_17059_to_fp16 = const()[name = string("op_17059_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471470272)))]; - tensor k_45_cast_fp16 = mul(x = normed_539, y = var_17059_to_fp16)[name = string("k_45_cast_fp16")]; - tensor var_17073_cast_fp16 = mul(x = q_45_cast_fp16, y = cos_5)[name = string("op_17073_cast_fp16")]; - tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_89_cast_fp16 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_45_cast_fp16)[name = string("x1_89_cast_fp16")]; - tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_89_cast_fp16 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_45_cast_fp16)[name = string("x2_89_cast_fp16")]; - fp16 const_962_promoted_to_fp16 = const()[name = string("const_962_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17094_cast_fp16 = mul(x = x2_89_cast_fp16, y = const_962_promoted_to_fp16)[name = string("op_17094_cast_fp16")]; - int32 var_17096 = const()[name = string("op_17096"), val = int32(-1)]; - bool var_17097_interleave_0 = const()[name = string("op_17097_interleave_0"), val = bool(false)]; - tensor var_17097_cast_fp16 = concat(axis = var_17096, interleave = var_17097_interleave_0, values = (var_17094_cast_fp16, x1_89_cast_fp16))[name = string("op_17097_cast_fp16")]; - tensor var_17098_cast_fp16 = mul(x = var_17097_cast_fp16, y = sin_5)[name = string("op_17098_cast_fp16")]; - tensor query_states_179_cast_fp16 = add(x = var_17073_cast_fp16, y = var_17098_cast_fp16)[name = string("query_states_179_cast_fp16")]; - tensor var_17101_cast_fp16 = mul(x = k_45_cast_fp16, y = cos_5)[name = string("op_17101_cast_fp16")]; - tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_91_cast_fp16 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_45_cast_fp16)[name = string("x1_91_cast_fp16")]; - tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_91_cast_fp16 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_45_cast_fp16)[name = string("x2_91_cast_fp16")]; - fp16 const_965_promoted_to_fp16 = const()[name = string("const_965_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17122_cast_fp16 = mul(x = x2_91_cast_fp16, y = const_965_promoted_to_fp16)[name = string("op_17122_cast_fp16")]; - int32 var_17124 = const()[name = string("op_17124"), val = int32(-1)]; - bool var_17125_interleave_0 = const()[name = string("op_17125_interleave_0"), val = bool(false)]; - tensor var_17125_cast_fp16 = concat(axis = var_17124, interleave = var_17125_interleave_0, values = (var_17122_cast_fp16, x1_91_cast_fp16))[name = string("op_17125_cast_fp16")]; - tensor var_17126_cast_fp16 = mul(x = var_17125_cast_fp16, y = sin_5)[name = string("op_17126_cast_fp16")]; - tensor key_states_223_cast_fp16 = add(x = var_17101_cast_fp16, y = var_17126_cast_fp16)[name = string("key_states_223_cast_fp16")]; - tensor expand_dims_264 = const()[name = string("expand_dims_264"), val = tensor([19])]; - tensor expand_dims_265 = const()[name = string("expand_dims_265"), val = tensor([0])]; - tensor expand_dims_267 = const()[name = string("expand_dims_267"), val = tensor([0])]; - tensor expand_dims_268 = const()[name = string("expand_dims_268"), val = tensor([20])]; - int32 concat_398_axis_0 = const()[name = string("concat_398_axis_0"), val = int32(0)]; - bool concat_398_interleave_0 = const()[name = string("concat_398_interleave_0"), val = bool(false)]; - tensor concat_398 = concat(axis = concat_398_axis_0, interleave = concat_398_interleave_0, values = (expand_dims_264, expand_dims_265, current_pos, expand_dims_267))[name = string("concat_398")]; - tensor concat_399_values1_0 = const()[name = string("concat_399_values1_0"), val = tensor([0])]; - tensor concat_399_values3_0 = const()[name = string("concat_399_values3_0"), val = tensor([0])]; - int32 concat_399_axis_0 = const()[name = string("concat_399_axis_0"), val = int32(0)]; - bool concat_399_interleave_0 = const()[name = string("concat_399_interleave_0"), val = bool(false)]; - tensor concat_399 = concat(axis = concat_399_axis_0, interleave = concat_399_interleave_0, values = (expand_dims_268, concat_399_values1_0, end_pos_1, concat_399_values3_0))[name = string("concat_399")]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16 = slice_update(begin = concat_398, begin_mask = model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0, end = concat_399, end_mask = model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_39_stride_0, update = key_states_223_cast_fp16, x = coreml_update_state_95)[name = string("model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_148_write_state")]; - tensor coreml_update_state_96 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_148")]; - tensor expand_dims_270 = const()[name = string("expand_dims_270"), val = tensor([41])]; - tensor expand_dims_271 = const()[name = string("expand_dims_271"), val = tensor([0])]; - tensor expand_dims_273 = const()[name = string("expand_dims_273"), val = tensor([0])]; - tensor expand_dims_274 = const()[name = string("expand_dims_274"), val = tensor([42])]; - int32 concat_402_axis_0 = const()[name = string("concat_402_axis_0"), val = int32(0)]; - bool concat_402_interleave_0 = const()[name = string("concat_402_interleave_0"), val = bool(false)]; - tensor concat_402 = concat(axis = concat_402_axis_0, interleave = concat_402_interleave_0, values = (expand_dims_270, expand_dims_271, current_pos, expand_dims_273))[name = string("concat_402")]; - tensor concat_403_values1_0 = const()[name = string("concat_403_values1_0"), val = tensor([0])]; - tensor concat_403_values3_0 = const()[name = string("concat_403_values3_0"), val = tensor([0])]; - int32 concat_403_axis_0 = const()[name = string("concat_403_axis_0"), val = int32(0)]; - bool concat_403_interleave_0 = const()[name = string("concat_403_interleave_0"), val = bool(false)]; - tensor concat_403 = concat(axis = concat_403_axis_0, interleave = concat_403_interleave_0, values = (expand_dims_274, concat_403_values1_0, end_pos_1, concat_403_values3_0))[name = string("concat_403")]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_179 = transpose(perm = var_17005, x = var_17000)[name = string("transpose_32")]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16 = slice_update(begin = concat_402, begin_mask = model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0, end = concat_403, end_mask = model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_40_stride_0, update = value_states_179, x = coreml_update_state_96)[name = string("model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_149_write_state")]; - tensor coreml_update_state_97 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_149")]; - tensor var_17225_begin_0 = const()[name = string("op_17225_begin_0"), val = tensor([19, 0, 0, 0])]; - tensor var_17225_end_0 = const()[name = string("op_17225_end_0"), val = tensor([20, 1, 512, 256])]; - tensor var_17225_end_mask_0 = const()[name = string("op_17225_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_17225_cast_fp16 = slice_by_index(begin = var_17225_begin_0, end = var_17225_end_0, end_mask = var_17225_end_mask_0, x = coreml_update_state_97)[name = string("op_17225_cast_fp16")]; - tensor var_17232_begin_0 = const()[name = string("op_17232_begin_0"), val = tensor([41, 0, 0, 0])]; - tensor var_17232_end_0 = const()[name = string("op_17232_end_0"), val = tensor([42, 1, 512, 256])]; - tensor var_17232_end_mask_0 = const()[name = string("op_17232_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_17232_cast_fp16 = slice_by_index(begin = var_17232_begin_0, end = var_17232_end_0, end_mask = var_17232_end_mask_0, x = coreml_update_state_97)[name = string("op_17232_cast_fp16")]; - tensor var_17271 = const()[name = string("op_17271"), val = tensor([1, 4, 1, 1])]; - tensor x_357_cast_fp16 = tile(reps = var_17271, x = var_17225_cast_fp16)[name = string("x_357_cast_fp16")]; - tensor var_17291 = const()[name = string("op_17291"), val = tensor([1, 4, 1, 1])]; - tensor x_363_cast_fp16 = tile(reps = var_17291, x = var_17232_cast_fp16)[name = string("x_363_cast_fp16")]; - bool var_17318_transpose_x_0 = const()[name = string("op_17318_transpose_x_0"), val = bool(false)]; - bool var_17318_transpose_y_0 = const()[name = string("op_17318_transpose_y_0"), val = bool(true)]; - tensor var_17318 = matmul(transpose_x = var_17318_transpose_x_0, transpose_y = var_17318_transpose_y_0, x = query_states_179_cast_fp16, y = x_357_cast_fp16)[name = string("op_17318")]; - fp16 var_17319_to_fp16 = const()[name = string("op_17319_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_89_cast_fp16 = mul(x = var_17318, y = var_17319_to_fp16)[name = string("attn_weights_89_cast_fp16")]; - tensor attn_weights_91_cast_fp16 = add(x = attn_weights_89_cast_fp16, y = mask_slice_1)[name = string("attn_weights_91_cast_fp16")]; - int32 var_17354 = const()[name = string("op_17354"), val = int32(-1)]; - tensor var_17356_cast_fp16 = softmax(axis = var_17354, x = attn_weights_91_cast_fp16)[name = string("op_17356_cast_fp16")]; - tensor concat_408 = const()[name = string("concat_408"), val = tensor([4, 64, 512])]; - tensor reshape_66_cast_fp16 = reshape(shape = concat_408, x = var_17356_cast_fp16)[name = string("reshape_66_cast_fp16")]; - tensor concat_409 = const()[name = string("concat_409"), val = tensor([4, 512, 256])]; - tensor reshape_67_cast_fp16 = reshape(shape = concat_409, x = x_363_cast_fp16)[name = string("reshape_67_cast_fp16")]; - bool matmul_22_transpose_x_0 = const()[name = string("matmul_22_transpose_x_0"), val = bool(false)]; - bool matmul_22_transpose_y_0 = const()[name = string("matmul_22_transpose_y_0"), val = bool(false)]; - tensor matmul_22_cast_fp16 = matmul(transpose_x = matmul_22_transpose_x_0, transpose_y = matmul_22_transpose_y_0, x = reshape_66_cast_fp16, y = reshape_67_cast_fp16)[name = string("matmul_22_cast_fp16")]; - tensor concat_413 = const()[name = string("concat_413"), val = tensor([1, 4, 64, 256])]; - tensor reshape_68_cast_fp16 = reshape(shape = concat_413, x = matmul_22_cast_fp16)[name = string("reshape_68_cast_fp16")]; - tensor var_17368_perm_0 = const()[name = string("op_17368_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_17387 = const()[name = string("op_17387"), val = tensor([1, 64, 1024])]; - tensor var_17368_cast_fp16 = transpose(perm = var_17368_perm_0, x = reshape_68_cast_fp16)[name = string("transpose_31")]; - tensor attn_output_225_cast_fp16 = reshape(shape = var_17387, x = var_17368_cast_fp16)[name = string("attn_output_225_cast_fp16")]; - tensor var_17392 = const()[name = string("op_17392"), val = tensor([0, 2, 1])]; - string var_17408_pad_type_0 = const()[name = string("op_17408_pad_type_0"), val = string("valid")]; - int32 var_17408_groups_0 = const()[name = string("op_17408_groups_0"), val = int32(1)]; - tensor var_17408_strides_0 = const()[name = string("op_17408_strides_0"), val = tensor([1])]; - tensor var_17408_pad_0 = const()[name = string("op_17408_pad_0"), val = tensor([0, 0])]; - tensor var_17408_dilations_0 = const()[name = string("op_17408_dilations_0"), val = tensor([1])]; - tensor squeeze_22_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471470848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472355648))))[name = string("squeeze_22_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_17393_cast_fp16 = transpose(perm = var_17392, x = attn_output_225_cast_fp16)[name = string("transpose_30")]; - tensor var_17408_cast_fp16 = conv(dilations = var_17408_dilations_0, groups = var_17408_groups_0, pad = var_17408_pad_0, pad_type = var_17408_pad_type_0, strides = var_17408_strides_0, weight = squeeze_22_cast_fp16_to_fp32_to_fp16_palettized, x = var_17393_cast_fp16)[name = string("op_17408_cast_fp16")]; - tensor var_17412 = const()[name = string("op_17412"), val = tensor([0, 2, 1])]; - int32 var_17423 = const()[name = string("op_17423"), val = int32(-1)]; - fp16 const_977_promoted_to_fp16 = const()[name = string("const_977_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_361_cast_fp16 = transpose(perm = var_17412, x = var_17408_cast_fp16)[name = string("transpose_29")]; - tensor var_17425_cast_fp16 = mul(x = hidden_states_361_cast_fp16, y = const_977_promoted_to_fp16)[name = string("op_17425_cast_fp16")]; - bool input_451_interleave_0 = const()[name = string("input_451_interleave_0"), val = bool(false)]; - tensor input_451_cast_fp16 = concat(axis = var_17423, interleave = input_451_interleave_0, values = (hidden_states_361_cast_fp16, var_17425_cast_fp16))[name = string("input_451_cast_fp16")]; - tensor normed_541_axes_0 = const()[name = string("normed_541_axes_0"), val = tensor([-1])]; - fp16 var_17420_to_fp16 = const()[name = string("op_17420_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_541_cast_fp16 = layer_norm(axes = normed_541_axes_0, epsilon = var_17420_to_fp16, x = input_451_cast_fp16)[name = string("normed_541_cast_fp16")]; - tensor normed_543_begin_0 = const()[name = string("normed_543_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_543_end_0 = const()[name = string("normed_543_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_543_end_mask_0 = const()[name = string("normed_543_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_543_cast_fp16 = slice_by_index(begin = normed_543_begin_0, end = normed_543_end_0, end_mask = normed_543_end_mask_0, x = normed_541_cast_fp16)[name = string("normed_543_cast_fp16")]; - tensor var_17439_to_fp16 = const()[name = string("op_17439_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472374144)))]; - tensor attn_output_229_cast_fp16 = mul(x = normed_543_cast_fp16, y = var_17439_to_fp16)[name = string("attn_output_229_cast_fp16")]; - tensor hidden_states_363_cast_fp16 = add(x = hidden_states_353_cast_fp16, y = attn_output_229_cast_fp16)[name = string("hidden_states_363_cast_fp16")]; - int32 var_17452 = const()[name = string("op_17452"), val = int32(-1)]; - fp16 const_981_promoted_to_fp16 = const()[name = string("const_981_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17454_cast_fp16 = mul(x = hidden_states_363_cast_fp16, y = const_981_promoted_to_fp16)[name = string("op_17454_cast_fp16")]; - bool input_453_interleave_0 = const()[name = string("input_453_interleave_0"), val = bool(false)]; - tensor input_453_cast_fp16 = concat(axis = var_17452, interleave = input_453_interleave_0, values = (hidden_states_363_cast_fp16, var_17454_cast_fp16))[name = string("input_453_cast_fp16")]; - tensor normed_545_axes_0 = const()[name = string("normed_545_axes_0"), val = tensor([-1])]; - fp16 var_17449_to_fp16 = const()[name = string("op_17449_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_545_cast_fp16 = layer_norm(axes = normed_545_axes_0, epsilon = var_17449_to_fp16, x = input_453_cast_fp16)[name = string("normed_545_cast_fp16")]; - tensor normed_547_begin_0 = const()[name = string("normed_547_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_547_end_0 = const()[name = string("normed_547_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_547_end_mask_0 = const()[name = string("normed_547_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_547_cast_fp16 = slice_by_index(begin = normed_547_begin_0, end = normed_547_end_0, end_mask = normed_547_end_mask_0, x = normed_545_cast_fp16)[name = string("normed_547_cast_fp16")]; - tensor var_17468_to_fp16 = const()[name = string("op_17468_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472376512)))]; - tensor x_365_cast_fp16 = mul(x = normed_547_cast_fp16, y = var_17468_to_fp16)[name = string("x_365_cast_fp16")]; - tensor var_17480 = const()[name = string("op_17480"), val = tensor([0, 2, 1])]; - tensor input_455_axes_0 = const()[name = string("input_455_axes_0"), val = tensor([2])]; - tensor var_17481_cast_fp16 = transpose(perm = var_17480, x = x_365_cast_fp16)[name = string("transpose_28")]; - tensor input_455_cast_fp16 = expand_dims(axes = input_455_axes_0, x = var_17481_cast_fp16)[name = string("input_455_cast_fp16")]; - string x_367_pad_type_0 = const()[name = string("x_367_pad_type_0"), val = string("valid")]; - tensor x_367_strides_0 = const()[name = string("x_367_strides_0"), val = tensor([1, 1])]; - tensor x_367_pad_0 = const()[name = string("x_367_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_367_dilations_0 = const()[name = string("x_367_dilations_0"), val = tensor([1, 1])]; - int32 x_367_groups_0 = const()[name = string("x_367_groups_0"), val = int32(1)]; - tensor model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1140426176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1146398208))))[name = string("model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_367_cast_fp16 = conv(dilations = x_367_dilations_0, groups = x_367_groups_0, pad = x_367_pad_0, pad_type = x_367_pad_type_0, strides = x_367_strides_0, weight = model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_455_cast_fp16)[name = string("x_367_cast_fp16")]; - string b_45_pad_type_0 = const()[name = string("b_45_pad_type_0"), val = string("valid")]; - tensor b_45_strides_0 = const()[name = string("b_45_strides_0"), val = tensor([1, 1])]; - tensor b_45_pad_0 = const()[name = string("b_45_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_45_dilations_0 = const()[name = string("b_45_dilations_0"), val = tensor([1, 1])]; - int32 b_45_groups_0 = const()[name = string("b_45_groups_0"), val = int32(1)]; - tensor model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1146508864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1152480896))))[name = string("model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_45_cast_fp16 = conv(dilations = b_45_dilations_0, groups = b_45_groups_0, pad = b_45_pad_0, pad_type = b_45_pad_type_0, strides = b_45_strides_0, weight = model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_455_cast_fp16)[name = string("b_45_cast_fp16")]; - string var_17506_mode_0 = const()[name = string("op_17506_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_17506_cast_fp16 = gelu(mode = var_17506_mode_0, x = x_367_cast_fp16)[name = string("op_17506_cast_fp16")]; - tensor input_457_cast_fp16 = mul(x = var_17506_cast_fp16, y = b_45_cast_fp16)[name = string("input_457_cast_fp16")]; - string e_45_pad_type_0 = const()[name = string("e_45_pad_type_0"), val = string("valid")]; - tensor e_45_strides_0 = const()[name = string("e_45_strides_0"), val = tensor([1, 1])]; - tensor e_45_pad_0 = const()[name = string("e_45_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_45_dilations_0 = const()[name = string("e_45_dilations_0"), val = tensor([1, 1])]; - int32 e_45_groups_0 = const()[name = string("e_45_groups_0"), val = int32(1)]; - tensor model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484544256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490516288))))[name = string("model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_45_cast_fp16 = conv(dilations = e_45_dilations_0, groups = e_45_groups_0, pad = e_45_pad_0, pad_type = e_45_pad_type_0, strides = e_45_strides_0, weight = model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_457_cast_fp16)[name = string("e_45_cast_fp16")]; - tensor var_17514_axes_0 = const()[name = string("op_17514_axes_0"), val = tensor([2])]; - tensor var_17514_cast_fp16 = squeeze(axes = var_17514_axes_0, x = e_45_cast_fp16)[name = string("op_17514_cast_fp16")]; - tensor var_17515 = const()[name = string("op_17515"), val = tensor([0, 2, 1])]; - int32 var_17526 = const()[name = string("op_17526"), val = int32(-1)]; - fp16 const_985_promoted_to_fp16 = const()[name = string("const_985_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_365_cast_fp16 = transpose(perm = var_17515, x = var_17514_cast_fp16)[name = string("transpose_27")]; - tensor var_17528_cast_fp16 = mul(x = hidden_states_365_cast_fp16, y = const_985_promoted_to_fp16)[name = string("op_17528_cast_fp16")]; - bool input_459_interleave_0 = const()[name = string("input_459_interleave_0"), val = bool(false)]; - tensor input_459_cast_fp16 = concat(axis = var_17526, interleave = input_459_interleave_0, values = (hidden_states_365_cast_fp16, var_17528_cast_fp16))[name = string("input_459_cast_fp16")]; - tensor normed_549_axes_0 = const()[name = string("normed_549_axes_0"), val = tensor([-1])]; - fp16 var_17523_to_fp16 = const()[name = string("op_17523_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_549_cast_fp16 = layer_norm(axes = normed_549_axes_0, epsilon = var_17523_to_fp16, x = input_459_cast_fp16)[name = string("normed_549_cast_fp16")]; - tensor normed_551_begin_0 = const()[name = string("normed_551_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_551_end_0 = const()[name = string("normed_551_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_551_end_mask_0 = const()[name = string("normed_551_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_551_cast_fp16 = slice_by_index(begin = normed_551_begin_0, end = normed_551_end_0, end_mask = normed_551_end_mask_0, x = normed_549_cast_fp16)[name = string("normed_551_cast_fp16")]; - tensor var_17542_to_fp16 = const()[name = string("op_17542_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490534784)))]; - tensor hidden_states_367_cast_fp16 = mul(x = normed_551_cast_fp16, y = var_17542_to_fp16)[name = string("hidden_states_367_cast_fp16")]; - tensor hidden_states_369_cast_fp16 = add(x = hidden_states_363_cast_fp16, y = hidden_states_367_cast_fp16)[name = string("hidden_states_369_cast_fp16")]; - int32 var_17596 = const()[name = string("op_17596"), val = int32(-1)]; - fp16 const_990_promoted_to_fp16 = const()[name = string("const_990_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17598_cast_fp16 = mul(x = hidden_states_369_cast_fp16, y = const_990_promoted_to_fp16)[name = string("op_17598_cast_fp16")]; - bool input_461_interleave_0 = const()[name = string("input_461_interleave_0"), val = bool(false)]; - tensor input_461_cast_fp16 = concat(axis = var_17596, interleave = input_461_interleave_0, values = (hidden_states_369_cast_fp16, var_17598_cast_fp16))[name = string("input_461_cast_fp16")]; - tensor normed_553_axes_0 = const()[name = string("normed_553_axes_0"), val = tensor([-1])]; - fp16 var_17593_to_fp16 = const()[name = string("op_17593_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_553_cast_fp16 = layer_norm(axes = normed_553_axes_0, epsilon = var_17593_to_fp16, x = input_461_cast_fp16)[name = string("normed_553_cast_fp16")]; - tensor normed_555_begin_0 = const()[name = string("normed_555_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_555_end_0 = const()[name = string("normed_555_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_555_end_mask_0 = const()[name = string("normed_555_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_555_cast_fp16 = slice_by_index(begin = normed_555_begin_0, end = normed_555_end_0, end_mask = normed_555_end_mask_0, x = normed_553_cast_fp16)[name = string("normed_555_cast_fp16")]; - tensor var_17612_to_fp16 = const()[name = string("op_17612_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490537152)))]; - tensor hidden_states_371_cast_fp16 = mul(x = normed_555_cast_fp16, y = var_17612_to_fp16)[name = string("hidden_states_371_cast_fp16")]; - tensor var_17623 = const()[name = string("op_17623"), val = tensor([0, 2, 1])]; - tensor var_17626_axes_0 = const()[name = string("op_17626_axes_0"), val = tensor([2])]; - tensor var_17624_cast_fp16 = transpose(perm = var_17623, x = hidden_states_371_cast_fp16)[name = string("transpose_26")]; - tensor var_17626_cast_fp16 = expand_dims(axes = var_17626_axes_0, x = var_17624_cast_fp16)[name = string("op_17626_cast_fp16")]; - string query_states_185_pad_type_0 = const()[name = string("query_states_185_pad_type_0"), val = string("valid")]; - tensor query_states_185_strides_0 = const()[name = string("query_states_185_strides_0"), val = tensor([1, 1])]; - tensor query_states_185_pad_0 = const()[name = string("query_states_185_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_185_dilations_0 = const()[name = string("query_states_185_dilations_0"), val = tensor([1, 1])]; - int32 query_states_185_groups_0 = const()[name = string("query_states_185_groups_0"), val = int32(1)]; - tensor query_states_185 = conv(dilations = query_states_185_dilations_0, groups = query_states_185_groups_0, pad = query_states_185_pad_0, pad_type = query_states_185_pad_type_0, strides = query_states_185_strides_0, weight = model_model_layers_23_self_attn_q_proj_weight_palettized, x = var_17626_cast_fp16)[name = string("query_states_185")]; - string key_states_231_pad_type_0 = const()[name = string("key_states_231_pad_type_0"), val = string("valid")]; - tensor key_states_231_strides_0 = const()[name = string("key_states_231_strides_0"), val = tensor([1, 1])]; - tensor key_states_231_pad_0 = const()[name = string("key_states_231_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_231_dilations_0 = const()[name = string("key_states_231_dilations_0"), val = tensor([1, 1])]; - int32 key_states_231_groups_0 = const()[name = string("key_states_231_groups_0"), val = int32(1)]; - tensor key_states_231 = conv(dilations = key_states_231_dilations_0, groups = key_states_231_groups_0, pad = key_states_231_pad_0, pad_type = key_states_231_pad_type_0, strides = key_states_231_strides_0, weight = model_model_layers_23_self_attn_k_proj_weight_palettized, x = var_17626_cast_fp16)[name = string("key_states_231")]; - string value_states_185_pad_type_0 = const()[name = string("value_states_185_pad_type_0"), val = string("valid")]; - tensor value_states_185_strides_0 = const()[name = string("value_states_185_strides_0"), val = tensor([1, 1])]; - tensor value_states_185_pad_0 = const()[name = string("value_states_185_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_185_dilations_0 = const()[name = string("value_states_185_dilations_0"), val = tensor([1, 1])]; - int32 value_states_185_groups_0 = const()[name = string("value_states_185_groups_0"), val = int32(1)]; - tensor value_states_185 = conv(dilations = value_states_185_dilations_0, groups = value_states_185_groups_0, pad = value_states_185_pad_0, pad_type = value_states_185_pad_type_0, strides = value_states_185_strides_0, weight = model_model_layers_23_self_attn_v_proj_weight_palettized, x = var_17626_cast_fp16)[name = string("value_states_185")]; - tensor var_17668 = const()[name = string("op_17668"), val = tensor([1, 4, 256, 64])]; - tensor var_17669 = reshape(shape = var_17668, x = query_states_185)[name = string("op_17669")]; - tensor var_17674 = const()[name = string("op_17674"), val = tensor([0, 1, 3, 2])]; - tensor var_17679 = const()[name = string("op_17679"), val = tensor([1, 1, 256, 64])]; - tensor var_17680 = reshape(shape = var_17679, x = key_states_231)[name = string("op_17680")]; - tensor var_17685 = const()[name = string("op_17685"), val = tensor([0, 1, 3, 2])]; - tensor var_17690 = const()[name = string("op_17690"), val = tensor([1, 1, 256, 64])]; - tensor var_17691 = reshape(shape = var_17690, x = value_states_185)[name = string("op_17691")]; - tensor var_17696 = const()[name = string("op_17696"), val = tensor([0, 1, 3, 2])]; - int32 var_17707 = const()[name = string("op_17707"), val = int32(-1)]; - fp16 const_995_promoted = const()[name = string("const_995_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_373 = transpose(perm = var_17674, x = var_17669)[name = string("transpose_25")]; - tensor var_17709 = mul(x = hidden_states_373, y = const_995_promoted)[name = string("op_17709")]; - bool input_465_interleave_0 = const()[name = string("input_465_interleave_0"), val = bool(false)]; - tensor input_465 = concat(axis = var_17707, interleave = input_465_interleave_0, values = (hidden_states_373, var_17709))[name = string("input_465")]; - tensor normed_557_axes_0 = const()[name = string("normed_557_axes_0"), val = tensor([-1])]; - fp16 var_17704_to_fp16 = const()[name = string("op_17704_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_557_cast_fp16 = layer_norm(axes = normed_557_axes_0, epsilon = var_17704_to_fp16, x = input_465)[name = string("normed_557_cast_fp16")]; - tensor normed_559_begin_0 = const()[name = string("normed_559_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_559_end_0 = const()[name = string("normed_559_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_559_end_mask_0 = const()[name = string("normed_559_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_559 = slice_by_index(begin = normed_559_begin_0, end = normed_559_end_0, end_mask = normed_559_end_mask_0, x = normed_557_cast_fp16)[name = string("normed_559")]; - tensor var_17723_to_fp16 = const()[name = string("op_17723_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490539520)))]; - tensor q_47_cast_fp16 = mul(x = normed_559, y = var_17723_to_fp16)[name = string("q_47_cast_fp16")]; - int32 var_17734 = const()[name = string("op_17734"), val = int32(-1)]; - fp16 const_999_promoted = const()[name = string("const_999_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_375 = transpose(perm = var_17685, x = var_17680)[name = string("transpose_24")]; - tensor var_17736 = mul(x = hidden_states_375, y = const_999_promoted)[name = string("op_17736")]; - bool input_467_interleave_0 = const()[name = string("input_467_interleave_0"), val = bool(false)]; - tensor input_467 = concat(axis = var_17734, interleave = input_467_interleave_0, values = (hidden_states_375, var_17736))[name = string("input_467")]; - tensor normed_561_axes_0 = const()[name = string("normed_561_axes_0"), val = tensor([-1])]; - fp16 var_17731_to_fp16 = const()[name = string("op_17731_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_561_cast_fp16 = layer_norm(axes = normed_561_axes_0, epsilon = var_17731_to_fp16, x = input_467)[name = string("normed_561_cast_fp16")]; - tensor normed_563_begin_0 = const()[name = string("normed_563_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_563_end_0 = const()[name = string("normed_563_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_563_end_mask_0 = const()[name = string("normed_563_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_563 = slice_by_index(begin = normed_563_begin_0, end = normed_563_end_0, end_mask = normed_563_end_mask_0, x = normed_561_cast_fp16)[name = string("normed_563")]; - tensor var_17750_to_fp16 = const()[name = string("op_17750_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490540096)))]; - tensor k_47_cast_fp16 = mul(x = normed_563, y = var_17750_to_fp16)[name = string("k_47_cast_fp16")]; - tensor var_17764_cast_fp16 = mul(x = q_47_cast_fp16, y = cos_35)[name = string("op_17764_cast_fp16")]; - tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_93_cast_fp16 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_47_cast_fp16)[name = string("x1_93_cast_fp16")]; - tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_93_cast_fp16 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_47_cast_fp16)[name = string("x2_93_cast_fp16")]; - fp16 const_1005_promoted_to_fp16 = const()[name = string("const_1005_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17785_cast_fp16 = mul(x = x2_93_cast_fp16, y = const_1005_promoted_to_fp16)[name = string("op_17785_cast_fp16")]; - int32 var_17787 = const()[name = string("op_17787"), val = int32(-1)]; - bool var_17788_interleave_0 = const()[name = string("op_17788_interleave_0"), val = bool(false)]; - tensor var_17788_cast_fp16 = concat(axis = var_17787, interleave = var_17788_interleave_0, values = (var_17785_cast_fp16, x1_93_cast_fp16))[name = string("op_17788_cast_fp16")]; - tensor var_17789_cast_fp16 = mul(x = var_17788_cast_fp16, y = sin_35)[name = string("op_17789_cast_fp16")]; - tensor query_states_187_cast_fp16 = add(x = var_17764_cast_fp16, y = var_17789_cast_fp16)[name = string("query_states_187_cast_fp16")]; - tensor var_17792_cast_fp16 = mul(x = k_47_cast_fp16, y = cos_35)[name = string("op_17792_cast_fp16")]; - tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_95_cast_fp16 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = k_47_cast_fp16)[name = string("x1_95_cast_fp16")]; - tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_95_cast_fp16 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = k_47_cast_fp16)[name = string("x2_95_cast_fp16")]; - fp16 const_1008_promoted_to_fp16 = const()[name = string("const_1008_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17813_cast_fp16 = mul(x = x2_95_cast_fp16, y = const_1008_promoted_to_fp16)[name = string("op_17813_cast_fp16")]; - int32 var_17815 = const()[name = string("op_17815"), val = int32(-1)]; - bool var_17816_interleave_0 = const()[name = string("op_17816_interleave_0"), val = bool(false)]; - tensor var_17816_cast_fp16 = concat(axis = var_17815, interleave = var_17816_interleave_0, values = (var_17813_cast_fp16, x1_95_cast_fp16))[name = string("op_17816_cast_fp16")]; - tensor var_17817_cast_fp16 = mul(x = var_17816_cast_fp16, y = sin_35)[name = string("op_17817_cast_fp16")]; - tensor key_states_233_cast_fp16 = add(x = var_17792_cast_fp16, y = var_17817_cast_fp16)[name = string("key_states_233_cast_fp16")]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_7_stride_0, update = key_states_233_cast_fp16, x = coreml_update_state_87)[name = string("model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_150_write_state")]; - tensor coreml_update_state_98 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_150")]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_187 = transpose(perm = var_17696, x = var_17691)[name = string("transpose_23")]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_146, begin_mask = model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0, end = concat_147, end_mask = model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_8_stride_0, update = value_states_187, x = coreml_update_state_98)[name = string("model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_151_write_state")]; - tensor coreml_update_state_99 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_151")]; - tensor var_17916_begin_0 = const()[name = string("op_17916_begin_0"), val = tensor([3, 0, 0, 0])]; - tensor var_17916_end_0 = const()[name = string("op_17916_end_0"), val = tensor([4, 1, 4096, 256])]; - tensor var_17916_end_mask_0 = const()[name = string("op_17916_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_17916_cast_fp16 = slice_by_index(begin = var_17916_begin_0, end = var_17916_end_0, end_mask = var_17916_end_mask_0, x = coreml_update_state_99)[name = string("op_17916_cast_fp16")]; - tensor var_17923_begin_0 = const()[name = string("op_17923_begin_0"), val = tensor([7, 0, 0, 0])]; - tensor var_17923_end_0 = const()[name = string("op_17923_end_0"), val = tensor([1, 1, 4096, 256])]; - tensor var_17923_end_mask_0 = const()[name = string("op_17923_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_17923_cast_fp16 = slice_by_index(begin = var_17923_begin_0, end = var_17923_end_0, end_mask = var_17923_end_mask_0, x = coreml_update_state_99)[name = string("op_17923_cast_fp16")]; - tensor var_17962 = const()[name = string("op_17962"), val = tensor([1, 4, 1, 1])]; - tensor x_373_cast_fp16 = tile(reps = var_17962, x = var_17916_cast_fp16)[name = string("x_373_cast_fp16")]; - tensor var_17982 = const()[name = string("op_17982"), val = tensor([1, 4, 1, 1])]; - tensor x_379_cast_fp16 = tile(reps = var_17982, x = var_17923_cast_fp16)[name = string("x_379_cast_fp16")]; - bool var_18009_transpose_x_0 = const()[name = string("op_18009_transpose_x_0"), val = bool(false)]; - bool var_18009_transpose_y_0 = const()[name = string("op_18009_transpose_y_0"), val = bool(true)]; - tensor var_18009 = matmul(transpose_x = var_18009_transpose_x_0, transpose_y = var_18009_transpose_y_0, x = query_states_187_cast_fp16, y = x_373_cast_fp16)[name = string("op_18009")]; - fp16 var_18010_to_fp16 = const()[name = string("op_18010_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_93_cast_fp16 = mul(x = var_18009, y = var_18010_to_fp16)[name = string("attn_weights_93_cast_fp16")]; - tensor attn_weights_95_cast_fp16 = add(x = attn_weights_93_cast_fp16, y = causal_mask)[name = string("attn_weights_95_cast_fp16")]; - int32 var_18045 = const()[name = string("op_18045"), val = int32(-1)]; - tensor var_18047_cast_fp16 = softmax(axis = var_18045, x = attn_weights_95_cast_fp16)[name = string("op_18047_cast_fp16")]; - tensor concat_426 = const()[name = string("concat_426"), val = tensor([4, 64, 4096])]; - tensor reshape_69_cast_fp16 = reshape(shape = concat_426, x = var_18047_cast_fp16)[name = string("reshape_69_cast_fp16")]; - tensor concat_427 = const()[name = string("concat_427"), val = tensor([4, 4096, 256])]; - tensor reshape_70_cast_fp16 = reshape(shape = concat_427, x = x_379_cast_fp16)[name = string("reshape_70_cast_fp16")]; - bool matmul_23_transpose_x_0 = const()[name = string("matmul_23_transpose_x_0"), val = bool(false)]; - bool matmul_23_transpose_y_0 = const()[name = string("matmul_23_transpose_y_0"), val = bool(false)]; - tensor matmul_23_cast_fp16 = matmul(transpose_x = matmul_23_transpose_x_0, transpose_y = matmul_23_transpose_y_0, x = reshape_69_cast_fp16, y = reshape_70_cast_fp16)[name = string("matmul_23_cast_fp16")]; - tensor concat_431 = const()[name = string("concat_431"), val = tensor([1, 4, 64, 256])]; - tensor reshape_71_cast_fp16 = reshape(shape = concat_431, x = matmul_23_cast_fp16)[name = string("reshape_71_cast_fp16")]; - tensor var_18059_perm_0 = const()[name = string("op_18059_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_18078 = const()[name = string("op_18078"), val = tensor([1, 64, 1024])]; - tensor var_18059_cast_fp16 = transpose(perm = var_18059_perm_0, x = reshape_71_cast_fp16)[name = string("transpose_22")]; - tensor attn_output_235_cast_fp16 = reshape(shape = var_18078, x = var_18059_cast_fp16)[name = string("attn_output_235_cast_fp16")]; - tensor var_18083 = const()[name = string("op_18083"), val = tensor([0, 2, 1])]; - string var_18099_pad_type_0 = const()[name = string("op_18099_pad_type_0"), val = string("valid")]; - int32 var_18099_groups_0 = const()[name = string("op_18099_groups_0"), val = int32(1)]; - tensor var_18099_strides_0 = const()[name = string("op_18099_strides_0"), val = tensor([1])]; - tensor var_18099_pad_0 = const()[name = string("op_18099_pad_0"), val = tensor([0, 0])]; - tensor var_18099_dilations_0 = const()[name = string("op_18099_dilations_0"), val = tensor([1])]; - tensor squeeze_23_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490540672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491425472))))[name = string("squeeze_23_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_18084_cast_fp16 = transpose(perm = var_18083, x = attn_output_235_cast_fp16)[name = string("transpose_21")]; - tensor var_18099_cast_fp16 = conv(dilations = var_18099_dilations_0, groups = var_18099_groups_0, pad = var_18099_pad_0, pad_type = var_18099_pad_type_0, strides = var_18099_strides_0, weight = squeeze_23_cast_fp16_to_fp32_to_fp16_palettized, x = var_18084_cast_fp16)[name = string("op_18099_cast_fp16")]; - tensor var_18103 = const()[name = string("op_18103"), val = tensor([0, 2, 1])]; - int32 var_18114 = const()[name = string("op_18114"), val = int32(-1)]; - fp16 const_1020_promoted_to_fp16 = const()[name = string("const_1020_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_377_cast_fp16 = transpose(perm = var_18103, x = var_18099_cast_fp16)[name = string("transpose_20")]; - tensor var_18116_cast_fp16 = mul(x = hidden_states_377_cast_fp16, y = const_1020_promoted_to_fp16)[name = string("op_18116_cast_fp16")]; - bool input_471_interleave_0 = const()[name = string("input_471_interleave_0"), val = bool(false)]; - tensor input_471_cast_fp16 = concat(axis = var_18114, interleave = input_471_interleave_0, values = (hidden_states_377_cast_fp16, var_18116_cast_fp16))[name = string("input_471_cast_fp16")]; - tensor normed_565_axes_0 = const()[name = string("normed_565_axes_0"), val = tensor([-1])]; - fp16 var_18111_to_fp16 = const()[name = string("op_18111_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_565_cast_fp16 = layer_norm(axes = normed_565_axes_0, epsilon = var_18111_to_fp16, x = input_471_cast_fp16)[name = string("normed_565_cast_fp16")]; - tensor normed_567_begin_0 = const()[name = string("normed_567_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_567_end_0 = const()[name = string("normed_567_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_567_end_mask_0 = const()[name = string("normed_567_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_567_cast_fp16 = slice_by_index(begin = normed_567_begin_0, end = normed_567_end_0, end_mask = normed_567_end_mask_0, x = normed_565_cast_fp16)[name = string("normed_567_cast_fp16")]; - tensor var_18130_to_fp16 = const()[name = string("op_18130_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491443968)))]; - tensor attn_output_239_cast_fp16 = mul(x = normed_567_cast_fp16, y = var_18130_to_fp16)[name = string("attn_output_239_cast_fp16")]; - tensor hidden_states_379_cast_fp16 = add(x = hidden_states_369_cast_fp16, y = attn_output_239_cast_fp16)[name = string("hidden_states_379_cast_fp16")]; - int32 var_18143 = const()[name = string("op_18143"), val = int32(-1)]; - fp16 const_1024_promoted_to_fp16 = const()[name = string("const_1024_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_18145_cast_fp16 = mul(x = hidden_states_379_cast_fp16, y = const_1024_promoted_to_fp16)[name = string("op_18145_cast_fp16")]; - bool input_473_interleave_0 = const()[name = string("input_473_interleave_0"), val = bool(false)]; - tensor input_473_cast_fp16 = concat(axis = var_18143, interleave = input_473_interleave_0, values = (hidden_states_379_cast_fp16, var_18145_cast_fp16))[name = string("input_473_cast_fp16")]; - tensor normed_569_axes_0 = const()[name = string("normed_569_axes_0"), val = tensor([-1])]; - fp16 var_18140_to_fp16 = const()[name = string("op_18140_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_569_cast_fp16 = layer_norm(axes = normed_569_axes_0, epsilon = var_18140_to_fp16, x = input_473_cast_fp16)[name = string("normed_569_cast_fp16")]; - tensor normed_571_begin_0 = const()[name = string("normed_571_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_571_end_0 = const()[name = string("normed_571_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_571_end_mask_0 = const()[name = string("normed_571_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_571_cast_fp16 = slice_by_index(begin = normed_571_begin_0, end = normed_571_end_0, end_mask = normed_571_end_mask_0, x = normed_569_cast_fp16)[name = string("normed_571_cast_fp16")]; - tensor var_18159_to_fp16 = const()[name = string("op_18159_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491446336)))]; - tensor x_381_cast_fp16 = mul(x = normed_571_cast_fp16, y = var_18159_to_fp16)[name = string("x_381_cast_fp16")]; - tensor var_18171 = const()[name = string("op_18171"), val = tensor([0, 2, 1])]; - tensor input_475_axes_0 = const()[name = string("input_475_axes_0"), val = tensor([2])]; - tensor var_18172_cast_fp16 = transpose(perm = var_18171, x = x_381_cast_fp16)[name = string("transpose_19")]; - tensor input_475_cast_fp16 = expand_dims(axes = input_475_axes_0, x = var_18172_cast_fp16)[name = string("input_475_cast_fp16")]; - string x_383_pad_type_0 = const()[name = string("x_383_pad_type_0"), val = string("valid")]; - tensor x_383_strides_0 = const()[name = string("x_383_strides_0"), val = tensor([1, 1])]; - tensor x_383_pad_0 = const()[name = string("x_383_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_383_dilations_0 = const()[name = string("x_383_dilations_0"), val = tensor([1, 1])]; - int32 x_383_groups_0 = const()[name = string("x_383_groups_0"), val = int32(1)]; - tensor model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1152591552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1158563584))))[name = string("model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_383_cast_fp16 = conv(dilations = x_383_dilations_0, groups = x_383_groups_0, pad = x_383_pad_0, pad_type = x_383_pad_type_0, strides = x_383_strides_0, weight = model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_475_cast_fp16)[name = string("x_383_cast_fp16")]; - string b_47_pad_type_0 = const()[name = string("b_47_pad_type_0"), val = string("valid")]; - tensor b_47_strides_0 = const()[name = string("b_47_strides_0"), val = tensor([1, 1])]; - tensor b_47_pad_0 = const()[name = string("b_47_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_47_dilations_0 = const()[name = string("b_47_dilations_0"), val = tensor([1, 1])]; - int32 b_47_groups_0 = const()[name = string("b_47_groups_0"), val = int32(1)]; - tensor model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1158674240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1164646272))))[name = string("model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_47_cast_fp16 = conv(dilations = b_47_dilations_0, groups = b_47_groups_0, pad = b_47_pad_0, pad_type = b_47_pad_type_0, strides = b_47_strides_0, weight = model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_475_cast_fp16)[name = string("b_47_cast_fp16")]; - string var_18197_mode_0 = const()[name = string("op_18197_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_18197_cast_fp16 = gelu(mode = var_18197_mode_0, x = x_383_cast_fp16)[name = string("op_18197_cast_fp16")]; - tensor input_477_cast_fp16 = mul(x = var_18197_cast_fp16, y = b_47_cast_fp16)[name = string("input_477_cast_fp16")]; - string e_47_pad_type_0 = const()[name = string("e_47_pad_type_0"), val = string("valid")]; - tensor e_47_strides_0 = const()[name = string("e_47_strides_0"), val = tensor([1, 1])]; - tensor e_47_pad_0 = const()[name = string("e_47_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_47_dilations_0 = const()[name = string("e_47_dilations_0"), val = tensor([1, 1])]; - int32 e_47_groups_0 = const()[name = string("e_47_groups_0"), val = int32(1)]; - tensor model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503614080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509586112))))[name = string("model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_47_cast_fp16 = conv(dilations = e_47_dilations_0, groups = e_47_groups_0, pad = e_47_pad_0, pad_type = e_47_pad_type_0, strides = e_47_strides_0, weight = model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_477_cast_fp16)[name = string("e_47_cast_fp16")]; - tensor var_18205_axes_0 = const()[name = string("op_18205_axes_0"), val = tensor([2])]; - tensor var_18205_cast_fp16 = squeeze(axes = var_18205_axes_0, x = e_47_cast_fp16)[name = string("op_18205_cast_fp16")]; - tensor var_18206 = const()[name = string("op_18206"), val = tensor([0, 2, 1])]; - int32 var_18217 = const()[name = string("op_18217"), val = int32(-1)]; - fp16 const_1028_promoted_to_fp16 = const()[name = string("const_1028_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_381_cast_fp16 = transpose(perm = var_18206, x = var_18205_cast_fp16)[name = string("transpose_18")]; - tensor var_18219_cast_fp16 = mul(x = hidden_states_381_cast_fp16, y = const_1028_promoted_to_fp16)[name = string("op_18219_cast_fp16")]; - bool input_479_interleave_0 = const()[name = string("input_479_interleave_0"), val = bool(false)]; - tensor input_479_cast_fp16 = concat(axis = var_18217, interleave = input_479_interleave_0, values = (hidden_states_381_cast_fp16, var_18219_cast_fp16))[name = string("input_479_cast_fp16")]; - tensor normed_573_axes_0 = const()[name = string("normed_573_axes_0"), val = tensor([-1])]; - fp16 var_18214_to_fp16 = const()[name = string("op_18214_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_573_cast_fp16 = layer_norm(axes = normed_573_axes_0, epsilon = var_18214_to_fp16, x = input_479_cast_fp16)[name = string("normed_573_cast_fp16")]; - tensor normed_575_begin_0 = const()[name = string("normed_575_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_575_end_0 = const()[name = string("normed_575_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_575_end_mask_0 = const()[name = string("normed_575_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_575_cast_fp16 = slice_by_index(begin = normed_575_begin_0, end = normed_575_end_0, end_mask = normed_575_end_mask_0, x = normed_573_cast_fp16)[name = string("normed_575_cast_fp16")]; - tensor var_18233_to_fp16 = const()[name = string("op_18233_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509604608)))]; - tensor hidden_states_383_cast_fp16 = mul(x = normed_575_cast_fp16, y = var_18233_to_fp16)[name = string("hidden_states_383_cast_fp16")]; - tensor hidden_states_385_cast_fp16 = add(x = hidden_states_379_cast_fp16, y = hidden_states_383_cast_fp16)[name = string("hidden_states_385_cast_fp16")]; - int32 var_18287 = const()[name = string("op_18287"), val = int32(-1)]; - fp16 const_1033_promoted_to_fp16 = const()[name = string("const_1033_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_18289_cast_fp16 = mul(x = hidden_states_385_cast_fp16, y = const_1033_promoted_to_fp16)[name = string("op_18289_cast_fp16")]; - bool input_481_interleave_0 = const()[name = string("input_481_interleave_0"), val = bool(false)]; - tensor input_481_cast_fp16 = concat(axis = var_18287, interleave = input_481_interleave_0, values = (hidden_states_385_cast_fp16, var_18289_cast_fp16))[name = string("input_481_cast_fp16")]; - tensor normed_577_axes_0 = const()[name = string("normed_577_axes_0"), val = tensor([-1])]; - fp16 var_18284_to_fp16 = const()[name = string("op_18284_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_577_cast_fp16 = layer_norm(axes = normed_577_axes_0, epsilon = var_18284_to_fp16, x = input_481_cast_fp16)[name = string("normed_577_cast_fp16")]; - tensor normed_579_begin_0 = const()[name = string("normed_579_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_579_end_0 = const()[name = string("normed_579_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_579_end_mask_0 = const()[name = string("normed_579_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_579_cast_fp16 = slice_by_index(begin = normed_579_begin_0, end = normed_579_end_0, end_mask = normed_579_end_mask_0, x = normed_577_cast_fp16)[name = string("normed_579_cast_fp16")]; - tensor var_18303_to_fp16 = const()[name = string("op_18303_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509606976)))]; - tensor hidden_states_387_cast_fp16 = mul(x = normed_579_cast_fp16, y = var_18303_to_fp16)[name = string("hidden_states_387_cast_fp16")]; - tensor var_18314 = const()[name = string("op_18314"), val = tensor([0, 2, 1])]; - tensor var_18317_axes_0 = const()[name = string("op_18317_axes_0"), val = tensor([2])]; - tensor var_18315_cast_fp16 = transpose(perm = var_18314, x = hidden_states_387_cast_fp16)[name = string("transpose_17")]; - tensor var_18317_cast_fp16 = expand_dims(axes = var_18317_axes_0, x = var_18315_cast_fp16)[name = string("op_18317_cast_fp16")]; - string query_states_193_pad_type_0 = const()[name = string("query_states_193_pad_type_0"), val = string("valid")]; - tensor query_states_193_strides_0 = const()[name = string("query_states_193_strides_0"), val = tensor([1, 1])]; - tensor query_states_193_pad_0 = const()[name = string("query_states_193_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_193_dilations_0 = const()[name = string("query_states_193_dilations_0"), val = tensor([1, 1])]; - int32 query_states_193_groups_0 = const()[name = string("query_states_193_groups_0"), val = int32(1)]; - tensor query_states_193 = conv(dilations = query_states_193_dilations_0, groups = query_states_193_groups_0, pad = query_states_193_pad_0, pad_type = query_states_193_pad_type_0, strides = query_states_193_strides_0, weight = model_model_layers_24_self_attn_q_proj_weight_palettized, x = var_18317_cast_fp16)[name = string("query_states_193")]; - string key_states_241_pad_type_0 = const()[name = string("key_states_241_pad_type_0"), val = string("valid")]; - tensor key_states_241_strides_0 = const()[name = string("key_states_241_strides_0"), val = tensor([1, 1])]; - tensor key_states_241_pad_0 = const()[name = string("key_states_241_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_241_dilations_0 = const()[name = string("key_states_241_dilations_0"), val = tensor([1, 1])]; - int32 key_states_241_groups_0 = const()[name = string("key_states_241_groups_0"), val = int32(1)]; - tensor key_states_241 = conv(dilations = key_states_241_dilations_0, groups = key_states_241_groups_0, pad = key_states_241_pad_0, pad_type = key_states_241_pad_type_0, strides = key_states_241_strides_0, weight = model_model_layers_24_self_attn_k_proj_weight_palettized, x = var_18317_cast_fp16)[name = string("key_states_241")]; - string value_states_193_pad_type_0 = const()[name = string("value_states_193_pad_type_0"), val = string("valid")]; - tensor value_states_193_strides_0 = const()[name = string("value_states_193_strides_0"), val = tensor([1, 1])]; - tensor value_states_193_pad_0 = const()[name = string("value_states_193_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_193_dilations_0 = const()[name = string("value_states_193_dilations_0"), val = tensor([1, 1])]; - int32 value_states_193_groups_0 = const()[name = string("value_states_193_groups_0"), val = int32(1)]; - tensor value_states_193 = conv(dilations = value_states_193_dilations_0, groups = value_states_193_groups_0, pad = value_states_193_pad_0, pad_type = value_states_193_pad_type_0, strides = value_states_193_strides_0, weight = model_model_layers_24_self_attn_v_proj_weight_palettized, x = var_18317_cast_fp16)[name = string("value_states_193")]; - tensor var_18359 = const()[name = string("op_18359"), val = tensor([1, 4, 256, 64])]; - tensor var_18360 = reshape(shape = var_18359, x = query_states_193)[name = string("op_18360")]; - tensor var_18365 = const()[name = string("op_18365"), val = tensor([0, 1, 3, 2])]; - tensor var_18370 = const()[name = string("op_18370"), val = tensor([1, 1, 256, 64])]; - tensor var_18371 = reshape(shape = var_18370, x = key_states_241)[name = string("op_18371")]; - tensor var_18376 = const()[name = string("op_18376"), val = tensor([0, 1, 3, 2])]; - tensor var_18381 = const()[name = string("op_18381"), val = tensor([1, 1, 256, 64])]; - tensor var_18382 = reshape(shape = var_18381, x = value_states_193)[name = string("op_18382")]; - tensor var_18387 = const()[name = string("op_18387"), val = tensor([0, 1, 3, 2])]; - int32 var_18398 = const()[name = string("op_18398"), val = int32(-1)]; - fp16 const_1038_promoted = const()[name = string("const_1038_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_389 = transpose(perm = var_18365, x = var_18360)[name = string("transpose_16")]; - tensor var_18400 = mul(x = hidden_states_389, y = const_1038_promoted)[name = string("op_18400")]; - bool input_485_interleave_0 = const()[name = string("input_485_interleave_0"), val = bool(false)]; - tensor input_485 = concat(axis = var_18398, interleave = input_485_interleave_0, values = (hidden_states_389, var_18400))[name = string("input_485")]; - tensor normed_581_axes_0 = const()[name = string("normed_581_axes_0"), val = tensor([-1])]; - fp16 var_18395_to_fp16 = const()[name = string("op_18395_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_581_cast_fp16 = layer_norm(axes = normed_581_axes_0, epsilon = var_18395_to_fp16, x = input_485)[name = string("normed_581_cast_fp16")]; - tensor normed_583_begin_0 = const()[name = string("normed_583_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_583_end_0 = const()[name = string("normed_583_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_583_end_mask_0 = const()[name = string("normed_583_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_583 = slice_by_index(begin = normed_583_begin_0, end = normed_583_end_0, end_mask = normed_583_end_mask_0, x = normed_581_cast_fp16)[name = string("normed_583")]; - tensor var_18414_to_fp16 = const()[name = string("op_18414_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509609344)))]; - tensor q_49_cast_fp16 = mul(x = normed_583, y = var_18414_to_fp16)[name = string("q_49_cast_fp16")]; - int32 var_18425 = const()[name = string("op_18425"), val = int32(-1)]; - fp16 const_1042_promoted = const()[name = string("const_1042_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_391 = transpose(perm = var_18376, x = var_18371)[name = string("transpose_15")]; - tensor var_18427 = mul(x = hidden_states_391, y = const_1042_promoted)[name = string("op_18427")]; - bool input_487_interleave_0 = const()[name = string("input_487_interleave_0"), val = bool(false)]; - tensor input_487 = concat(axis = var_18425, interleave = input_487_interleave_0, values = (hidden_states_391, var_18427))[name = string("input_487")]; - tensor normed_585_axes_0 = const()[name = string("normed_585_axes_0"), val = tensor([-1])]; - fp16 var_18422_to_fp16 = const()[name = string("op_18422_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_585_cast_fp16 = layer_norm(axes = normed_585_axes_0, epsilon = var_18422_to_fp16, x = input_487)[name = string("normed_585_cast_fp16")]; - tensor normed_587_begin_0 = const()[name = string("normed_587_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_587_end_0 = const()[name = string("normed_587_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_587_end_mask_0 = const()[name = string("normed_587_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_587 = slice_by_index(begin = normed_587_begin_0, end = normed_587_end_0, end_mask = normed_587_end_mask_0, x = normed_585_cast_fp16)[name = string("normed_587")]; - tensor var_18441_to_fp16 = const()[name = string("op_18441_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509609920)))]; - tensor k_49_cast_fp16 = mul(x = normed_587, y = var_18441_to_fp16)[name = string("k_49_cast_fp16")]; - tensor var_18455_cast_fp16 = mul(x = q_49_cast_fp16, y = cos_5)[name = string("op_18455_cast_fp16")]; - tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_97_cast_fp16 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = q_49_cast_fp16)[name = string("x1_97_cast_fp16")]; - tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_97_cast_fp16 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = q_49_cast_fp16)[name = string("x2_97_cast_fp16")]; - fp16 const_1048_promoted_to_fp16 = const()[name = string("const_1048_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_18476_cast_fp16 = mul(x = x2_97_cast_fp16, y = const_1048_promoted_to_fp16)[name = string("op_18476_cast_fp16")]; - int32 var_18478 = const()[name = string("op_18478"), val = int32(-1)]; - bool var_18479_interleave_0 = const()[name = string("op_18479_interleave_0"), val = bool(false)]; - tensor var_18479_cast_fp16 = concat(axis = var_18478, interleave = var_18479_interleave_0, values = (var_18476_cast_fp16, x1_97_cast_fp16))[name = string("op_18479_cast_fp16")]; - tensor var_18480_cast_fp16 = mul(x = var_18479_cast_fp16, y = sin_5)[name = string("op_18480_cast_fp16")]; - tensor query_states_195_cast_fp16 = add(x = var_18455_cast_fp16, y = var_18480_cast_fp16)[name = string("query_states_195_cast_fp16")]; - tensor var_18483_cast_fp16 = mul(x = k_49_cast_fp16, y = cos_5)[name = string("op_18483_cast_fp16")]; - tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_99_cast_fp16 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = k_49_cast_fp16)[name = string("x1_99_cast_fp16")]; - tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_99_cast_fp16 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = k_49_cast_fp16)[name = string("x2_99_cast_fp16")]; - fp16 const_1051_promoted_to_fp16 = const()[name = string("const_1051_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_18504_cast_fp16 = mul(x = x2_99_cast_fp16, y = const_1051_promoted_to_fp16)[name = string("op_18504_cast_fp16")]; - int32 var_18506 = const()[name = string("op_18506"), val = int32(-1)]; - bool var_18507_interleave_0 = const()[name = string("op_18507_interleave_0"), val = bool(false)]; - tensor var_18507_cast_fp16 = concat(axis = var_18506, interleave = var_18507_interleave_0, values = (var_18504_cast_fp16, x1_99_cast_fp16))[name = string("op_18507_cast_fp16")]; - tensor var_18508_cast_fp16 = mul(x = var_18507_cast_fp16, y = sin_5)[name = string("op_18508_cast_fp16")]; - tensor key_states_243_cast_fp16 = add(x = var_18483_cast_fp16, y = var_18508_cast_fp16)[name = string("key_states_243_cast_fp16")]; - tensor expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor([20])]; - tensor expand_dims_289 = const()[name = string("expand_dims_289"), val = tensor([0])]; - tensor expand_dims_291 = const()[name = string("expand_dims_291"), val = tensor([0])]; - tensor expand_dims_292 = const()[name = string("expand_dims_292"), val = tensor([21])]; - int32 concat_434_axis_0 = const()[name = string("concat_434_axis_0"), val = int32(0)]; - bool concat_434_interleave_0 = const()[name = string("concat_434_interleave_0"), val = bool(false)]; - tensor concat_434 = concat(axis = concat_434_axis_0, interleave = concat_434_interleave_0, values = (expand_dims_288, expand_dims_289, current_pos, expand_dims_291))[name = string("concat_434")]; - tensor concat_435_values1_0 = const()[name = string("concat_435_values1_0"), val = tensor([0])]; - tensor concat_435_values3_0 = const()[name = string("concat_435_values3_0"), val = tensor([0])]; - int32 concat_435_axis_0 = const()[name = string("concat_435_axis_0"), val = int32(0)]; - bool concat_435_interleave_0 = const()[name = string("concat_435_interleave_0"), val = bool(false)]; - tensor concat_435 = concat(axis = concat_435_axis_0, interleave = concat_435_interleave_0, values = (expand_dims_292, concat_435_values1_0, end_pos_1, concat_435_values3_0))[name = string("concat_435")]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16 = slice_update(begin = concat_434, begin_mask = model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0, end = concat_435, end_mask = model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_41_stride_0, update = key_states_243_cast_fp16, x = coreml_update_state_97)[name = string("model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_152_write_state")]; - tensor coreml_update_state_100 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_152")]; - tensor expand_dims_294 = const()[name = string("expand_dims_294"), val = tensor([42])]; - tensor expand_dims_295 = const()[name = string("expand_dims_295"), val = tensor([0])]; - tensor expand_dims_297 = const()[name = string("expand_dims_297"), val = tensor([0])]; - tensor expand_dims_298 = const()[name = string("expand_dims_298"), val = tensor([43])]; - int32 concat_438_axis_0 = const()[name = string("concat_438_axis_0"), val = int32(0)]; - bool concat_438_interleave_0 = const()[name = string("concat_438_interleave_0"), val = bool(false)]; - tensor concat_438 = concat(axis = concat_438_axis_0, interleave = concat_438_interleave_0, values = (expand_dims_294, expand_dims_295, current_pos, expand_dims_297))[name = string("concat_438")]; - tensor concat_439_values1_0 = const()[name = string("concat_439_values1_0"), val = tensor([0])]; - tensor concat_439_values3_0 = const()[name = string("concat_439_values3_0"), val = tensor([0])]; - int32 concat_439_axis_0 = const()[name = string("concat_439_axis_0"), val = int32(0)]; - bool concat_439_interleave_0 = const()[name = string("concat_439_interleave_0"), val = bool(false)]; - tensor concat_439 = concat(axis = concat_439_axis_0, interleave = concat_439_interleave_0, values = (expand_dims_298, concat_439_values1_0, end_pos_1, concat_439_values3_0))[name = string("concat_439")]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_195 = transpose(perm = var_18387, x = var_18382)[name = string("transpose_14")]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16 = slice_update(begin = concat_438, begin_mask = model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0, end = concat_439, end_mask = model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_42_stride_0, update = value_states_195, x = coreml_update_state_100)[name = string("model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_153_write_state")]; - tensor coreml_update_state_101 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_153")]; - tensor var_18607_begin_0 = const()[name = string("op_18607_begin_0"), val = tensor([20, 0, 0, 0])]; - tensor var_18607_end_0 = const()[name = string("op_18607_end_0"), val = tensor([21, 1, 512, 256])]; - tensor var_18607_end_mask_0 = const()[name = string("op_18607_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_18607_cast_fp16 = slice_by_index(begin = var_18607_begin_0, end = var_18607_end_0, end_mask = var_18607_end_mask_0, x = coreml_update_state_101)[name = string("op_18607_cast_fp16")]; - tensor var_18614_begin_0 = const()[name = string("op_18614_begin_0"), val = tensor([42, 0, 0, 0])]; - tensor var_18614_end_0 = const()[name = string("op_18614_end_0"), val = tensor([43, 1, 512, 256])]; - tensor var_18614_end_mask_0 = const()[name = string("op_18614_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_18614_cast_fp16 = slice_by_index(begin = var_18614_begin_0, end = var_18614_end_0, end_mask = var_18614_end_mask_0, x = coreml_update_state_101)[name = string("op_18614_cast_fp16")]; - tensor var_18653 = const()[name = string("op_18653"), val = tensor([1, 4, 1, 1])]; - tensor x_389_cast_fp16 = tile(reps = var_18653, x = var_18607_cast_fp16)[name = string("x_389_cast_fp16")]; - tensor var_18673 = const()[name = string("op_18673"), val = tensor([1, 4, 1, 1])]; - tensor x_395_cast_fp16 = tile(reps = var_18673, x = var_18614_cast_fp16)[name = string("x_395_cast_fp16")]; - bool var_18700_transpose_x_0 = const()[name = string("op_18700_transpose_x_0"), val = bool(false)]; - bool var_18700_transpose_y_0 = const()[name = string("op_18700_transpose_y_0"), val = bool(true)]; - tensor var_18700 = matmul(transpose_x = var_18700_transpose_x_0, transpose_y = var_18700_transpose_y_0, x = query_states_195_cast_fp16, y = x_389_cast_fp16)[name = string("op_18700")]; - fp16 var_18701_to_fp16 = const()[name = string("op_18701_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_97_cast_fp16 = mul(x = var_18700, y = var_18701_to_fp16)[name = string("attn_weights_97_cast_fp16")]; - tensor attn_weights_99_cast_fp16 = add(x = attn_weights_97_cast_fp16, y = mask_slice_1)[name = string("attn_weights_99_cast_fp16")]; - int32 var_18736 = const()[name = string("op_18736"), val = int32(-1)]; - tensor var_18738_cast_fp16 = softmax(axis = var_18736, x = attn_weights_99_cast_fp16)[name = string("op_18738_cast_fp16")]; - tensor concat_444 = const()[name = string("concat_444"), val = tensor([4, 64, 512])]; - tensor reshape_72_cast_fp16 = reshape(shape = concat_444, x = var_18738_cast_fp16)[name = string("reshape_72_cast_fp16")]; - tensor concat_445 = const()[name = string("concat_445"), val = tensor([4, 512, 256])]; - tensor reshape_73_cast_fp16 = reshape(shape = concat_445, x = x_395_cast_fp16)[name = string("reshape_73_cast_fp16")]; - bool matmul_24_transpose_x_0 = const()[name = string("matmul_24_transpose_x_0"), val = bool(false)]; - bool matmul_24_transpose_y_0 = const()[name = string("matmul_24_transpose_y_0"), val = bool(false)]; - tensor matmul_24_cast_fp16 = matmul(transpose_x = matmul_24_transpose_x_0, transpose_y = matmul_24_transpose_y_0, x = reshape_72_cast_fp16, y = reshape_73_cast_fp16)[name = string("matmul_24_cast_fp16")]; - tensor concat_449 = const()[name = string("concat_449"), val = tensor([1, 4, 64, 256])]; - tensor reshape_74_cast_fp16 = reshape(shape = concat_449, x = matmul_24_cast_fp16)[name = string("reshape_74_cast_fp16")]; - tensor var_18750_perm_0 = const()[name = string("op_18750_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_18769 = const()[name = string("op_18769"), val = tensor([1, 64, 1024])]; - tensor var_18750_cast_fp16 = transpose(perm = var_18750_perm_0, x = reshape_74_cast_fp16)[name = string("transpose_13")]; - tensor attn_output_245_cast_fp16 = reshape(shape = var_18769, x = var_18750_cast_fp16)[name = string("attn_output_245_cast_fp16")]; - tensor var_18774 = const()[name = string("op_18774"), val = tensor([0, 2, 1])]; - string var_18790_pad_type_0 = const()[name = string("op_18790_pad_type_0"), val = string("valid")]; - int32 var_18790_groups_0 = const()[name = string("op_18790_groups_0"), val = int32(1)]; - tensor var_18790_strides_0 = const()[name = string("op_18790_strides_0"), val = tensor([1])]; - tensor var_18790_pad_0 = const()[name = string("op_18790_pad_0"), val = tensor([0, 0])]; - tensor var_18790_dilations_0 = const()[name = string("op_18790_dilations_0"), val = tensor([1])]; - tensor squeeze_24_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509610496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(510495296))))[name = string("squeeze_24_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_18775_cast_fp16 = transpose(perm = var_18774, x = attn_output_245_cast_fp16)[name = string("transpose_12")]; - tensor var_18790_cast_fp16 = conv(dilations = var_18790_dilations_0, groups = var_18790_groups_0, pad = var_18790_pad_0, pad_type = var_18790_pad_type_0, strides = var_18790_strides_0, weight = squeeze_24_cast_fp16_to_fp32_to_fp16_palettized, x = var_18775_cast_fp16)[name = string("op_18790_cast_fp16")]; - tensor var_18794 = const()[name = string("op_18794"), val = tensor([0, 2, 1])]; - int32 var_18805 = const()[name = string("op_18805"), val = int32(-1)]; - fp16 const_1063_promoted_to_fp16 = const()[name = string("const_1063_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_393_cast_fp16 = transpose(perm = var_18794, x = var_18790_cast_fp16)[name = string("transpose_11")]; - tensor var_18807_cast_fp16 = mul(x = hidden_states_393_cast_fp16, y = const_1063_promoted_to_fp16)[name = string("op_18807_cast_fp16")]; - bool input_491_interleave_0 = const()[name = string("input_491_interleave_0"), val = bool(false)]; - tensor input_491_cast_fp16 = concat(axis = var_18805, interleave = input_491_interleave_0, values = (hidden_states_393_cast_fp16, var_18807_cast_fp16))[name = string("input_491_cast_fp16")]; - tensor normed_589_axes_0 = const()[name = string("normed_589_axes_0"), val = tensor([-1])]; - fp16 var_18802_to_fp16 = const()[name = string("op_18802_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_589_cast_fp16 = layer_norm(axes = normed_589_axes_0, epsilon = var_18802_to_fp16, x = input_491_cast_fp16)[name = string("normed_589_cast_fp16")]; - tensor normed_591_begin_0 = const()[name = string("normed_591_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_591_end_0 = const()[name = string("normed_591_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_591_end_mask_0 = const()[name = string("normed_591_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_591_cast_fp16 = slice_by_index(begin = normed_591_begin_0, end = normed_591_end_0, end_mask = normed_591_end_mask_0, x = normed_589_cast_fp16)[name = string("normed_591_cast_fp16")]; - tensor var_18821_to_fp16 = const()[name = string("op_18821_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(510513792)))]; - tensor attn_output_249_cast_fp16 = mul(x = normed_591_cast_fp16, y = var_18821_to_fp16)[name = string("attn_output_249_cast_fp16")]; - tensor hidden_states_395_cast_fp16 = add(x = hidden_states_385_cast_fp16, y = attn_output_249_cast_fp16)[name = string("hidden_states_395_cast_fp16")]; - int32 var_18834 = const()[name = string("op_18834"), val = int32(-1)]; - fp16 const_1067_promoted_to_fp16 = const()[name = string("const_1067_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_18836_cast_fp16 = mul(x = hidden_states_395_cast_fp16, y = const_1067_promoted_to_fp16)[name = string("op_18836_cast_fp16")]; - bool input_493_interleave_0 = const()[name = string("input_493_interleave_0"), val = bool(false)]; - tensor input_493_cast_fp16 = concat(axis = var_18834, interleave = input_493_interleave_0, values = (hidden_states_395_cast_fp16, var_18836_cast_fp16))[name = string("input_493_cast_fp16")]; - tensor normed_593_axes_0 = const()[name = string("normed_593_axes_0"), val = tensor([-1])]; - fp16 var_18831_to_fp16 = const()[name = string("op_18831_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_593_cast_fp16 = layer_norm(axes = normed_593_axes_0, epsilon = var_18831_to_fp16, x = input_493_cast_fp16)[name = string("normed_593_cast_fp16")]; - tensor normed_595_begin_0 = const()[name = string("normed_595_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_595_end_0 = const()[name = string("normed_595_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_595_end_mask_0 = const()[name = string("normed_595_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_595_cast_fp16 = slice_by_index(begin = normed_595_begin_0, end = normed_595_end_0, end_mask = normed_595_end_mask_0, x = normed_593_cast_fp16)[name = string("normed_595_cast_fp16")]; - tensor var_18850_to_fp16 = const()[name = string("op_18850_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(510516160)))]; - tensor x_397_cast_fp16 = mul(x = normed_595_cast_fp16, y = var_18850_to_fp16)[name = string("x_397_cast_fp16")]; - tensor var_18862 = const()[name = string("op_18862"), val = tensor([0, 2, 1])]; - tensor input_495_axes_0 = const()[name = string("input_495_axes_0"), val = tensor([2])]; - tensor var_18863_cast_fp16 = transpose(perm = var_18862, x = x_397_cast_fp16)[name = string("transpose_10")]; - tensor input_495_cast_fp16 = expand_dims(axes = input_495_axes_0, x = var_18863_cast_fp16)[name = string("input_495_cast_fp16")]; - string x_399_pad_type_0 = const()[name = string("x_399_pad_type_0"), val = string("valid")]; - tensor x_399_strides_0 = const()[name = string("x_399_strides_0"), val = tensor([1, 1])]; - tensor x_399_pad_0 = const()[name = string("x_399_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_399_dilations_0 = const()[name = string("x_399_dilations_0"), val = tensor([1, 1])]; - int32 x_399_groups_0 = const()[name = string("x_399_groups_0"), val = int32(1)]; - tensor model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1164756928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1170728960))))[name = string("model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_399_cast_fp16 = conv(dilations = x_399_dilations_0, groups = x_399_groups_0, pad = x_399_pad_0, pad_type = x_399_pad_type_0, strides = x_399_strides_0, weight = model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_495_cast_fp16)[name = string("x_399_cast_fp16")]; - string b_49_pad_type_0 = const()[name = string("b_49_pad_type_0"), val = string("valid")]; - tensor b_49_strides_0 = const()[name = string("b_49_strides_0"), val = tensor([1, 1])]; - tensor b_49_pad_0 = const()[name = string("b_49_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_49_dilations_0 = const()[name = string("b_49_dilations_0"), val = tensor([1, 1])]; - int32 b_49_groups_0 = const()[name = string("b_49_groups_0"), val = int32(1)]; - tensor model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1170839616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1176811648))))[name = string("model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_49_cast_fp16 = conv(dilations = b_49_dilations_0, groups = b_49_groups_0, pad = b_49_pad_0, pad_type = b_49_pad_type_0, strides = b_49_strides_0, weight = model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_495_cast_fp16)[name = string("b_49_cast_fp16")]; - string var_18888_mode_0 = const()[name = string("op_18888_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_18888_cast_fp16 = gelu(mode = var_18888_mode_0, x = x_399_cast_fp16)[name = string("op_18888_cast_fp16")]; - tensor input_497_cast_fp16 = mul(x = var_18888_cast_fp16, y = b_49_cast_fp16)[name = string("input_497_cast_fp16")]; - string e_49_pad_type_0 = const()[name = string("e_49_pad_type_0"), val = string("valid")]; - tensor e_49_strides_0 = const()[name = string("e_49_strides_0"), val = tensor([1, 1])]; - tensor e_49_pad_0 = const()[name = string("e_49_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_49_dilations_0 = const()[name = string("e_49_dilations_0"), val = tensor([1, 1])]; - int32 e_49_groups_0 = const()[name = string("e_49_groups_0"), val = int32(1)]; - tensor model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522683904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528655936))))[name = string("model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_49_cast_fp16 = conv(dilations = e_49_dilations_0, groups = e_49_groups_0, pad = e_49_pad_0, pad_type = e_49_pad_type_0, strides = e_49_strides_0, weight = model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_497_cast_fp16)[name = string("e_49_cast_fp16")]; - tensor var_18896_axes_0 = const()[name = string("op_18896_axes_0"), val = tensor([2])]; - tensor var_18896_cast_fp16 = squeeze(axes = var_18896_axes_0, x = e_49_cast_fp16)[name = string("op_18896_cast_fp16")]; - tensor var_18897 = const()[name = string("op_18897"), val = tensor([0, 2, 1])]; - int32 var_18908 = const()[name = string("op_18908"), val = int32(-1)]; - fp16 const_1071_promoted_to_fp16 = const()[name = string("const_1071_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_397_cast_fp16 = transpose(perm = var_18897, x = var_18896_cast_fp16)[name = string("transpose_9")]; - tensor var_18910_cast_fp16 = mul(x = hidden_states_397_cast_fp16, y = const_1071_promoted_to_fp16)[name = string("op_18910_cast_fp16")]; - bool input_499_interleave_0 = const()[name = string("input_499_interleave_0"), val = bool(false)]; - tensor input_499_cast_fp16 = concat(axis = var_18908, interleave = input_499_interleave_0, values = (hidden_states_397_cast_fp16, var_18910_cast_fp16))[name = string("input_499_cast_fp16")]; - tensor normed_597_axes_0 = const()[name = string("normed_597_axes_0"), val = tensor([-1])]; - fp16 var_18905_to_fp16 = const()[name = string("op_18905_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_597_cast_fp16 = layer_norm(axes = normed_597_axes_0, epsilon = var_18905_to_fp16, x = input_499_cast_fp16)[name = string("normed_597_cast_fp16")]; - tensor normed_599_begin_0 = const()[name = string("normed_599_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_599_end_0 = const()[name = string("normed_599_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_599_end_mask_0 = const()[name = string("normed_599_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_599_cast_fp16 = slice_by_index(begin = normed_599_begin_0, end = normed_599_end_0, end_mask = normed_599_end_mask_0, x = normed_597_cast_fp16)[name = string("normed_599_cast_fp16")]; - tensor var_18924_to_fp16 = const()[name = string("op_18924_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528674432)))]; - tensor hidden_states_399_cast_fp16 = mul(x = normed_599_cast_fp16, y = var_18924_to_fp16)[name = string("hidden_states_399_cast_fp16")]; - tensor hidden_states_401_cast_fp16 = add(x = hidden_states_395_cast_fp16, y = hidden_states_399_cast_fp16)[name = string("hidden_states_401_cast_fp16")]; - int32 var_18978 = const()[name = string("op_18978"), val = int32(-1)]; - fp16 const_1076_promoted_to_fp16 = const()[name = string("const_1076_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_18980_cast_fp16 = mul(x = hidden_states_401_cast_fp16, y = const_1076_promoted_to_fp16)[name = string("op_18980_cast_fp16")]; - bool input_501_interleave_0 = const()[name = string("input_501_interleave_0"), val = bool(false)]; - tensor input_501_cast_fp16 = concat(axis = var_18978, interleave = input_501_interleave_0, values = (hidden_states_401_cast_fp16, var_18980_cast_fp16))[name = string("input_501_cast_fp16")]; - tensor normed_601_axes_0 = const()[name = string("normed_601_axes_0"), val = tensor([-1])]; - fp16 var_18975_to_fp16 = const()[name = string("op_18975_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_601_cast_fp16 = layer_norm(axes = normed_601_axes_0, epsilon = var_18975_to_fp16, x = input_501_cast_fp16)[name = string("normed_601_cast_fp16")]; - tensor normed_603_begin_0 = const()[name = string("normed_603_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_603_end_0 = const()[name = string("normed_603_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_603_end_mask_0 = const()[name = string("normed_603_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_603_cast_fp16 = slice_by_index(begin = normed_603_begin_0, end = normed_603_end_0, end_mask = normed_603_end_mask_0, x = normed_601_cast_fp16)[name = string("normed_603_cast_fp16")]; - tensor var_18994_to_fp16 = const()[name = string("op_18994_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528676800)))]; - tensor hidden_states_403_cast_fp16 = mul(x = normed_603_cast_fp16, y = var_18994_to_fp16)[name = string("hidden_states_403_cast_fp16")]; - tensor var_19005 = const()[name = string("op_19005"), val = tensor([0, 2, 1])]; - tensor var_19008_axes_0 = const()[name = string("op_19008_axes_0"), val = tensor([2])]; - tensor var_19006_cast_fp16 = transpose(perm = var_19005, x = hidden_states_403_cast_fp16)[name = string("transpose_8")]; - tensor var_19008_cast_fp16 = expand_dims(axes = var_19008_axes_0, x = var_19006_cast_fp16)[name = string("op_19008_cast_fp16")]; - string query_states_201_pad_type_0 = const()[name = string("query_states_201_pad_type_0"), val = string("valid")]; - tensor query_states_201_strides_0 = const()[name = string("query_states_201_strides_0"), val = tensor([1, 1])]; - tensor query_states_201_pad_0 = const()[name = string("query_states_201_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_201_dilations_0 = const()[name = string("query_states_201_dilations_0"), val = tensor([1, 1])]; - int32 query_states_201_groups_0 = const()[name = string("query_states_201_groups_0"), val = int32(1)]; - tensor query_states_201 = conv(dilations = query_states_201_dilations_0, groups = query_states_201_groups_0, pad = query_states_201_pad_0, pad_type = query_states_201_pad_type_0, strides = query_states_201_strides_0, weight = model_model_layers_25_self_attn_q_proj_weight_palettized, x = var_19008_cast_fp16)[name = string("query_states_201")]; - string key_states_251_pad_type_0 = const()[name = string("key_states_251_pad_type_0"), val = string("valid")]; - tensor key_states_251_strides_0 = const()[name = string("key_states_251_strides_0"), val = tensor([1, 1])]; - tensor key_states_251_pad_0 = const()[name = string("key_states_251_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_251_dilations_0 = const()[name = string("key_states_251_dilations_0"), val = tensor([1, 1])]; - int32 key_states_251_groups_0 = const()[name = string("key_states_251_groups_0"), val = int32(1)]; - tensor key_states_251 = conv(dilations = key_states_251_dilations_0, groups = key_states_251_groups_0, pad = key_states_251_pad_0, pad_type = key_states_251_pad_type_0, strides = key_states_251_strides_0, weight = model_model_layers_25_self_attn_k_proj_weight_palettized, x = var_19008_cast_fp16)[name = string("key_states_251")]; - string value_states_201_pad_type_0 = const()[name = string("value_states_201_pad_type_0"), val = string("valid")]; - tensor value_states_201_strides_0 = const()[name = string("value_states_201_strides_0"), val = tensor([1, 1])]; - tensor value_states_201_pad_0 = const()[name = string("value_states_201_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_201_dilations_0 = const()[name = string("value_states_201_dilations_0"), val = tensor([1, 1])]; - int32 value_states_201_groups_0 = const()[name = string("value_states_201_groups_0"), val = int32(1)]; - tensor value_states_201 = conv(dilations = value_states_201_dilations_0, groups = value_states_201_groups_0, pad = value_states_201_pad_0, pad_type = value_states_201_pad_type_0, strides = value_states_201_strides_0, weight = model_model_layers_25_self_attn_v_proj_weight_palettized, x = var_19008_cast_fp16)[name = string("value_states_201")]; - tensor var_19050 = const()[name = string("op_19050"), val = tensor([1, 4, 256, 64])]; - tensor var_19051 = reshape(shape = var_19050, x = query_states_201)[name = string("op_19051")]; - tensor var_19056 = const()[name = string("op_19056"), val = tensor([0, 1, 3, 2])]; - tensor var_19061 = const()[name = string("op_19061"), val = tensor([1, 1, 256, 64])]; - tensor var_19062 = reshape(shape = var_19061, x = key_states_251)[name = string("op_19062")]; - tensor var_19067 = const()[name = string("op_19067"), val = tensor([0, 1, 3, 2])]; - tensor var_19072 = const()[name = string("op_19072"), val = tensor([1, 1, 256, 64])]; - tensor var_19073 = reshape(shape = var_19072, x = value_states_201)[name = string("op_19073")]; - tensor var_19078 = const()[name = string("op_19078"), val = tensor([0, 1, 3, 2])]; - int32 var_19089 = const()[name = string("op_19089"), val = int32(-1)]; - fp16 const_1081_promoted = const()[name = string("const_1081_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_405 = transpose(perm = var_19056, x = var_19051)[name = string("transpose_7")]; - tensor var_19091 = mul(x = hidden_states_405, y = const_1081_promoted)[name = string("op_19091")]; - bool input_505_interleave_0 = const()[name = string("input_505_interleave_0"), val = bool(false)]; - tensor input_505 = concat(axis = var_19089, interleave = input_505_interleave_0, values = (hidden_states_405, var_19091))[name = string("input_505")]; - tensor normed_605_axes_0 = const()[name = string("normed_605_axes_0"), val = tensor([-1])]; - fp16 var_19086_to_fp16 = const()[name = string("op_19086_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_605_cast_fp16 = layer_norm(axes = normed_605_axes_0, epsilon = var_19086_to_fp16, x = input_505)[name = string("normed_605_cast_fp16")]; - tensor normed_607_begin_0 = const()[name = string("normed_607_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_607_end_0 = const()[name = string("normed_607_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_607_end_mask_0 = const()[name = string("normed_607_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_607 = slice_by_index(begin = normed_607_begin_0, end = normed_607_end_0, end_mask = normed_607_end_mask_0, x = normed_605_cast_fp16)[name = string("normed_607")]; - tensor var_19105_to_fp16 = const()[name = string("op_19105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528679168)))]; - tensor q_cast_fp16 = mul(x = normed_607, y = var_19105_to_fp16)[name = string("q_cast_fp16")]; - int32 var_19116 = const()[name = string("op_19116"), val = int32(-1)]; - fp16 const_1085_promoted = const()[name = string("const_1085_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_407 = transpose(perm = var_19067, x = var_19062)[name = string("transpose_6")]; - tensor var_19118 = mul(x = hidden_states_407, y = const_1085_promoted)[name = string("op_19118")]; - bool input_507_interleave_0 = const()[name = string("input_507_interleave_0"), val = bool(false)]; - tensor input_507 = concat(axis = var_19116, interleave = input_507_interleave_0, values = (hidden_states_407, var_19118))[name = string("input_507")]; - tensor normed_609_axes_0 = const()[name = string("normed_609_axes_0"), val = tensor([-1])]; - fp16 var_19113_to_fp16 = const()[name = string("op_19113_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_609_cast_fp16 = layer_norm(axes = normed_609_axes_0, epsilon = var_19113_to_fp16, x = input_507)[name = string("normed_609_cast_fp16")]; - tensor normed_611_begin_0 = const()[name = string("normed_611_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_611_end_0 = const()[name = string("normed_611_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_611_end_mask_0 = const()[name = string("normed_611_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_611 = slice_by_index(begin = normed_611_begin_0, end = normed_611_end_0, end_mask = normed_611_end_mask_0, x = normed_609_cast_fp16)[name = string("normed_611")]; - tensor var_19132_to_fp16 = const()[name = string("op_19132_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528679744)))]; - tensor k_cast_fp16 = mul(x = normed_611, y = var_19132_to_fp16)[name = string("k_cast_fp16")]; - tensor var_19146_cast_fp16 = mul(x = q_cast_fp16, y = cos_5)[name = string("op_19146_cast_fp16")]; - tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_101_cast_fp16 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = q_cast_fp16)[name = string("x1_101_cast_fp16")]; - tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_101_cast_fp16 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = q_cast_fp16)[name = string("x2_101_cast_fp16")]; - fp16 const_1091_promoted_to_fp16 = const()[name = string("const_1091_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_19167_cast_fp16 = mul(x = x2_101_cast_fp16, y = const_1091_promoted_to_fp16)[name = string("op_19167_cast_fp16")]; - int32 var_19169 = const()[name = string("op_19169"), val = int32(-1)]; - bool var_19170_interleave_0 = const()[name = string("op_19170_interleave_0"), val = bool(false)]; - tensor var_19170_cast_fp16 = concat(axis = var_19169, interleave = var_19170_interleave_0, values = (var_19167_cast_fp16, x1_101_cast_fp16))[name = string("op_19170_cast_fp16")]; - tensor var_19171_cast_fp16 = mul(x = var_19170_cast_fp16, y = sin_5)[name = string("op_19171_cast_fp16")]; - tensor query_states_203_cast_fp16 = add(x = var_19146_cast_fp16, y = var_19171_cast_fp16)[name = string("query_states_203_cast_fp16")]; - tensor var_19174_cast_fp16 = mul(x = k_cast_fp16, y = cos_5)[name = string("op_19174_cast_fp16")]; - tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_cast_fp16)[name = string("x1_cast_fp16")]; - tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_cast_fp16)[name = string("x2_cast_fp16")]; - fp16 const_1094_promoted_to_fp16 = const()[name = string("const_1094_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_19195_cast_fp16 = mul(x = x2_cast_fp16, y = const_1094_promoted_to_fp16)[name = string("op_19195_cast_fp16")]; - int32 var_19197 = const()[name = string("op_19197"), val = int32(-1)]; - bool var_19198_interleave_0 = const()[name = string("op_19198_interleave_0"), val = bool(false)]; - tensor var_19198_cast_fp16 = concat(axis = var_19197, interleave = var_19198_interleave_0, values = (var_19195_cast_fp16, x1_cast_fp16))[name = string("op_19198_cast_fp16")]; - tensor var_19199_cast_fp16 = mul(x = var_19198_cast_fp16, y = sin_5)[name = string("op_19199_cast_fp16")]; - tensor key_states_253_cast_fp16 = add(x = var_19174_cast_fp16, y = var_19199_cast_fp16)[name = string("key_states_253_cast_fp16")]; - tensor expand_dims_300 = const()[name = string("expand_dims_300"), val = tensor([21])]; - tensor expand_dims_301 = const()[name = string("expand_dims_301"), val = tensor([0])]; - tensor expand_dims_303 = const()[name = string("expand_dims_303"), val = tensor([0])]; - tensor expand_dims_304 = const()[name = string("expand_dims_304"), val = tensor([22])]; - int32 concat_452_axis_0 = const()[name = string("concat_452_axis_0"), val = int32(0)]; - bool concat_452_interleave_0 = const()[name = string("concat_452_interleave_0"), val = bool(false)]; - tensor concat_452 = concat(axis = concat_452_axis_0, interleave = concat_452_interleave_0, values = (expand_dims_300, expand_dims_301, current_pos, expand_dims_303))[name = string("concat_452")]; - tensor concat_453_values1_0 = const()[name = string("concat_453_values1_0"), val = tensor([0])]; - tensor concat_453_values3_0 = const()[name = string("concat_453_values3_0"), val = tensor([0])]; - int32 concat_453_axis_0 = const()[name = string("concat_453_axis_0"), val = int32(0)]; - bool concat_453_interleave_0 = const()[name = string("concat_453_interleave_0"), val = bool(false)]; - tensor concat_453 = concat(axis = concat_453_axis_0, interleave = concat_453_interleave_0, values = (expand_dims_304, concat_453_values1_0, end_pos_1, concat_453_values3_0))[name = string("concat_453")]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16 = slice_update(begin = concat_452, begin_mask = model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0, end = concat_453, end_mask = model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_43_stride_0, update = key_states_253_cast_fp16, x = coreml_update_state_101)[name = string("model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_154_write_state")]; - tensor coreml_update_state_102 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_154")]; - tensor expand_dims_306 = const()[name = string("expand_dims_306"), val = tensor([43])]; - tensor expand_dims_307 = const()[name = string("expand_dims_307"), val = tensor([0])]; - tensor expand_dims_309 = const()[name = string("expand_dims_309"), val = tensor([0])]; - tensor expand_dims_310 = const()[name = string("expand_dims_310"), val = tensor([44])]; - int32 concat_456_axis_0 = const()[name = string("concat_456_axis_0"), val = int32(0)]; - bool concat_456_interleave_0 = const()[name = string("concat_456_interleave_0"), val = bool(false)]; - tensor concat_456 = concat(axis = concat_456_axis_0, interleave = concat_456_interleave_0, values = (expand_dims_306, expand_dims_307, current_pos, expand_dims_309))[name = string("concat_456")]; - tensor concat_457_values1_0 = const()[name = string("concat_457_values1_0"), val = tensor([0])]; - tensor concat_457_values3_0 = const()[name = string("concat_457_values3_0"), val = tensor([0])]; - int32 concat_457_axis_0 = const()[name = string("concat_457_axis_0"), val = int32(0)]; - bool concat_457_interleave_0 = const()[name = string("concat_457_interleave_0"), val = bool(false)]; - tensor concat_457 = concat(axis = concat_457_axis_0, interleave = concat_457_interleave_0, values = (expand_dims_310, concat_457_values1_0, end_pos_1, concat_457_values3_0))[name = string("concat_457")]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_203 = transpose(perm = var_19078, x = var_19073)[name = string("transpose_5")]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16 = slice_update(begin = concat_456, begin_mask = model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0, end = concat_457, end_mask = model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_44_stride_0, update = value_states_203, x = coreml_update_state_102)[name = string("model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_155_write_state")]; - tensor coreml_update_state_103 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_155")]; - tensor var_19298_begin_0 = const()[name = string("op_19298_begin_0"), val = tensor([21, 0, 0, 0])]; - tensor var_19298_end_0 = const()[name = string("op_19298_end_0"), val = tensor([22, 1, 512, 256])]; - tensor var_19298_end_mask_0 = const()[name = string("op_19298_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_19298_cast_fp16 = slice_by_index(begin = var_19298_begin_0, end = var_19298_end_0, end_mask = var_19298_end_mask_0, x = coreml_update_state_103)[name = string("op_19298_cast_fp16")]; - tensor var_19305_begin_0 = const()[name = string("op_19305_begin_0"), val = tensor([43, 0, 0, 0])]; - tensor var_19305_end_0 = const()[name = string("op_19305_end_0"), val = tensor([1, 1, 512, 256])]; - tensor var_19305_end_mask_0 = const()[name = string("op_19305_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_19305_cast_fp16 = slice_by_index(begin = var_19305_begin_0, end = var_19305_end_0, end_mask = var_19305_end_mask_0, x = coreml_update_state_103)[name = string("op_19305_cast_fp16")]; - tensor var_19344 = const()[name = string("op_19344"), val = tensor([1, 4, 1, 1])]; - tensor x_405_cast_fp16 = tile(reps = var_19344, x = var_19298_cast_fp16)[name = string("x_405_cast_fp16")]; - tensor var_19364 = const()[name = string("op_19364"), val = tensor([1, 4, 1, 1])]; - tensor x_411_cast_fp16 = tile(reps = var_19364, x = var_19305_cast_fp16)[name = string("x_411_cast_fp16")]; - bool var_19391_transpose_x_0 = const()[name = string("op_19391_transpose_x_0"), val = bool(false)]; - bool var_19391_transpose_y_0 = const()[name = string("op_19391_transpose_y_0"), val = bool(true)]; - tensor var_19391 = matmul(transpose_x = var_19391_transpose_x_0, transpose_y = var_19391_transpose_y_0, x = query_states_203_cast_fp16, y = x_405_cast_fp16)[name = string("op_19391")]; - fp16 var_19392_to_fp16 = const()[name = string("op_19392_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_101_cast_fp16 = mul(x = var_19391, y = var_19392_to_fp16)[name = string("attn_weights_101_cast_fp16")]; - tensor attn_weights_cast_fp16 = add(x = attn_weights_101_cast_fp16, y = mask_slice_1)[name = string("attn_weights_cast_fp16")]; - int32 var_19427 = const()[name = string("op_19427"), val = int32(-1)]; - tensor var_19429_cast_fp16 = softmax(axis = var_19427, x = attn_weights_cast_fp16)[name = string("op_19429_cast_fp16")]; - tensor concat_462 = const()[name = string("concat_462"), val = tensor([4, 64, 512])]; - tensor reshape_75_cast_fp16 = reshape(shape = concat_462, x = var_19429_cast_fp16)[name = string("reshape_75_cast_fp16")]; - tensor concat_463 = const()[name = string("concat_463"), val = tensor([4, 512, 256])]; - tensor reshape_76_cast_fp16 = reshape(shape = concat_463, x = x_411_cast_fp16)[name = string("reshape_76_cast_fp16")]; - bool matmul_25_transpose_x_0 = const()[name = string("matmul_25_transpose_x_0"), val = bool(false)]; - bool matmul_25_transpose_y_0 = const()[name = string("matmul_25_transpose_y_0"), val = bool(false)]; - tensor matmul_25_cast_fp16 = matmul(transpose_x = matmul_25_transpose_x_0, transpose_y = matmul_25_transpose_y_0, x = reshape_75_cast_fp16, y = reshape_76_cast_fp16)[name = string("matmul_25_cast_fp16")]; - tensor concat_467 = const()[name = string("concat_467"), val = tensor([1, 4, 64, 256])]; - tensor reshape_77_cast_fp16 = reshape(shape = concat_467, x = matmul_25_cast_fp16)[name = string("reshape_77_cast_fp16")]; - tensor var_19441_perm_0 = const()[name = string("op_19441_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_19460 = const()[name = string("op_19460"), val = tensor([1, 64, 1024])]; - tensor var_19441_cast_fp16 = transpose(perm = var_19441_perm_0, x = reshape_77_cast_fp16)[name = string("transpose_4")]; - tensor attn_output_255_cast_fp16 = reshape(shape = var_19460, x = var_19441_cast_fp16)[name = string("attn_output_255_cast_fp16")]; - tensor var_19465 = const()[name = string("op_19465"), val = tensor([0, 2, 1])]; - string var_19481_pad_type_0 = const()[name = string("op_19481_pad_type_0"), val = string("valid")]; - int32 var_19481_groups_0 = const()[name = string("op_19481_groups_0"), val = int32(1)]; - tensor var_19481_strides_0 = const()[name = string("op_19481_strides_0"), val = tensor([1])]; - tensor var_19481_pad_0 = const()[name = string("op_19481_pad_0"), val = tensor([0, 0])]; - tensor var_19481_dilations_0 = const()[name = string("op_19481_dilations_0"), val = tensor([1])]; - tensor squeeze_25_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528680320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529565120))))[name = string("squeeze_25_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_19466_cast_fp16 = transpose(perm = var_19465, x = attn_output_255_cast_fp16)[name = string("transpose_3")]; - tensor var_19481_cast_fp16 = conv(dilations = var_19481_dilations_0, groups = var_19481_groups_0, pad = var_19481_pad_0, pad_type = var_19481_pad_type_0, strides = var_19481_strides_0, weight = squeeze_25_cast_fp16_to_fp32_to_fp16_palettized, x = var_19466_cast_fp16)[name = string("op_19481_cast_fp16")]; - tensor var_19485 = const()[name = string("op_19485"), val = tensor([0, 2, 1])]; - int32 var_19496 = const()[name = string("op_19496"), val = int32(-1)]; - fp16 const_1106_promoted_to_fp16 = const()[name = string("const_1106_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_409_cast_fp16 = transpose(perm = var_19485, x = var_19481_cast_fp16)[name = string("transpose_2")]; - tensor var_19498_cast_fp16 = mul(x = hidden_states_409_cast_fp16, y = const_1106_promoted_to_fp16)[name = string("op_19498_cast_fp16")]; - bool input_511_interleave_0 = const()[name = string("input_511_interleave_0"), val = bool(false)]; - tensor input_511_cast_fp16 = concat(axis = var_19496, interleave = input_511_interleave_0, values = (hidden_states_409_cast_fp16, var_19498_cast_fp16))[name = string("input_511_cast_fp16")]; - tensor normed_613_axes_0 = const()[name = string("normed_613_axes_0"), val = tensor([-1])]; - fp16 var_19493_to_fp16 = const()[name = string("op_19493_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_613_cast_fp16 = layer_norm(axes = normed_613_axes_0, epsilon = var_19493_to_fp16, x = input_511_cast_fp16)[name = string("normed_613_cast_fp16")]; - tensor normed_615_begin_0 = const()[name = string("normed_615_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_615_end_0 = const()[name = string("normed_615_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_615_end_mask_0 = const()[name = string("normed_615_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_615_cast_fp16 = slice_by_index(begin = normed_615_begin_0, end = normed_615_end_0, end_mask = normed_615_end_mask_0, x = normed_613_cast_fp16)[name = string("normed_615_cast_fp16")]; - tensor var_19512_to_fp16 = const()[name = string("op_19512_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529583616)))]; - tensor attn_output_cast_fp16 = mul(x = normed_615_cast_fp16, y = var_19512_to_fp16)[name = string("attn_output_cast_fp16")]; - tensor hidden_states_411_cast_fp16 = add(x = hidden_states_401_cast_fp16, y = attn_output_cast_fp16)[name = string("hidden_states_411_cast_fp16")]; - int32 var_19525 = const()[name = string("op_19525"), val = int32(-1)]; - fp16 const_1110_promoted_to_fp16 = const()[name = string("const_1110_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_19527_cast_fp16 = mul(x = hidden_states_411_cast_fp16, y = const_1110_promoted_to_fp16)[name = string("op_19527_cast_fp16")]; - bool input_513_interleave_0 = const()[name = string("input_513_interleave_0"), val = bool(false)]; - tensor input_513_cast_fp16 = concat(axis = var_19525, interleave = input_513_interleave_0, values = (hidden_states_411_cast_fp16, var_19527_cast_fp16))[name = string("input_513_cast_fp16")]; - tensor normed_617_axes_0 = const()[name = string("normed_617_axes_0"), val = tensor([-1])]; - fp16 var_19522_to_fp16 = const()[name = string("op_19522_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_617_cast_fp16 = layer_norm(axes = normed_617_axes_0, epsilon = var_19522_to_fp16, x = input_513_cast_fp16)[name = string("normed_617_cast_fp16")]; - tensor normed_619_begin_0 = const()[name = string("normed_619_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_619_end_0 = const()[name = string("normed_619_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_619_end_mask_0 = const()[name = string("normed_619_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_619_cast_fp16 = slice_by_index(begin = normed_619_begin_0, end = normed_619_end_0, end_mask = normed_619_end_mask_0, x = normed_617_cast_fp16)[name = string("normed_619_cast_fp16")]; - tensor var_19541_to_fp16 = const()[name = string("op_19541_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529585984)))]; - tensor x_413_cast_fp16 = mul(x = normed_619_cast_fp16, y = var_19541_to_fp16)[name = string("x_413_cast_fp16")]; - tensor var_19553 = const()[name = string("op_19553"), val = tensor([0, 2, 1])]; - tensor input_515_axes_0 = const()[name = string("input_515_axes_0"), val = tensor([2])]; - tensor var_19554_cast_fp16 = transpose(perm = var_19553, x = x_413_cast_fp16)[name = string("transpose_1")]; - tensor input_515_cast_fp16 = expand_dims(axes = input_515_axes_0, x = var_19554_cast_fp16)[name = string("input_515_cast_fp16")]; - string x_pad_type_0 = const()[name = string("x_pad_type_0"), val = string("valid")]; - tensor x_strides_0 = const()[name = string("x_strides_0"), val = tensor([1, 1])]; - tensor x_pad_0 = const()[name = string("x_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_dilations_0 = const()[name = string("x_dilations_0"), val = tensor([1, 1])]; - int32 x_groups_0 = const()[name = string("x_groups_0"), val = int32(1)]; - tensor model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1176922304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1182894336))))[name = string("model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_cast_fp16 = conv(dilations = x_dilations_0, groups = x_groups_0, pad = x_pad_0, pad_type = x_pad_type_0, strides = x_strides_0, weight = model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_515_cast_fp16)[name = string("x_cast_fp16")]; - string b_pad_type_0 = const()[name = string("b_pad_type_0"), val = string("valid")]; - tensor b_strides_0 = const()[name = string("b_strides_0"), val = tensor([1, 1])]; - tensor b_pad_0 = const()[name = string("b_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_dilations_0 = const()[name = string("b_dilations_0"), val = tensor([1, 1])]; - int32 b_groups_0 = const()[name = string("b_groups_0"), val = int32(1)]; - tensor model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1183004992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1188977024))))[name = string("model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_cast_fp16 = conv(dilations = b_dilations_0, groups = b_groups_0, pad = b_pad_0, pad_type = b_pad_type_0, strides = b_strides_0, weight = model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_515_cast_fp16)[name = string("b_cast_fp16")]; - string var_19579_mode_0 = const()[name = string("op_19579_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_19579_cast_fp16 = gelu(mode = var_19579_mode_0, x = x_cast_fp16)[name = string("op_19579_cast_fp16")]; - tensor input_517_cast_fp16 = mul(x = var_19579_cast_fp16, y = b_cast_fp16)[name = string("input_517_cast_fp16")]; - string e_pad_type_0 = const()[name = string("e_pad_type_0"), val = string("valid")]; - tensor e_strides_0 = const()[name = string("e_strides_0"), val = tensor([1, 1])]; - tensor e_pad_0 = const()[name = string("e_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_dilations_0 = const()[name = string("e_dilations_0"), val = tensor([1, 1])]; - int32 e_groups_0 = const()[name = string("e_groups_0"), val = int32(1)]; - tensor model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(541753728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547725760))))[name = string("model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_cast_fp16 = conv(dilations = e_dilations_0, groups = e_groups_0, pad = e_pad_0, pad_type = e_pad_type_0, strides = e_strides_0, weight = model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_517_cast_fp16)[name = string("e_cast_fp16")]; - tensor var_19587_axes_0 = const()[name = string("op_19587_axes_0"), val = tensor([2])]; - tensor var_19587_cast_fp16 = squeeze(axes = var_19587_axes_0, x = e_cast_fp16)[name = string("op_19587_cast_fp16")]; - tensor var_19588 = const()[name = string("op_19588"), val = tensor([0, 2, 1])]; - int32 var_19599 = const()[name = string("op_19599"), val = int32(-1)]; - fp16 const_1114_promoted_to_fp16 = const()[name = string("const_1114_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_413_cast_fp16 = transpose(perm = var_19588, x = var_19587_cast_fp16)[name = string("transpose_0")]; - tensor var_19601_cast_fp16 = mul(x = hidden_states_413_cast_fp16, y = const_1114_promoted_to_fp16)[name = string("op_19601_cast_fp16")]; - bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)]; - tensor input_cast_fp16 = concat(axis = var_19599, interleave = input_interleave_0, values = (hidden_states_413_cast_fp16, var_19601_cast_fp16))[name = string("input_cast_fp16")]; - tensor normed_621_axes_0 = const()[name = string("normed_621_axes_0"), val = tensor([-1])]; - fp16 var_19596_to_fp16 = const()[name = string("op_19596_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_621_cast_fp16 = layer_norm(axes = normed_621_axes_0, epsilon = var_19596_to_fp16, x = input_cast_fp16)[name = string("normed_621_cast_fp16")]; - tensor normed_begin_0 = const()[name = string("normed_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_end_0 = const()[name = string("normed_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_end_mask_0 = const()[name = string("normed_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_cast_fp16 = slice_by_index(begin = normed_begin_0, end = normed_end_0, end_mask = normed_end_mask_0, x = normed_621_cast_fp16)[name = string("normed_cast_fp16")]; - tensor var_19615_to_fp16 = const()[name = string("op_19615_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547744256)))]; - tensor hidden_states_cast_fp16 = mul(x = normed_cast_fp16, y = var_19615_to_fp16)[name = string("hidden_states_cast_fp16")]; - tensor out_1_cast_fp16 = add(x = hidden_states_411_cast_fp16, y = hidden_states_cast_fp16)[name = string("out_1_cast_fp16")]; - tensor var_19621_begin_0 = const()[name = string("op_19621_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_19621_end_0 = const()[name = string("op_19621_end_0"), val = tensor([1, 1, 512, 256])]; - tensor var_19621_end_mask_0 = const()[name = string("op_19621_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_19621_squeeze_mask_0 = const()[name = string("op_19621_squeeze_mask_0"), val = tensor([true, false, false, false])]; - tensor var_19621_cast_fp16 = slice_by_index(begin = var_19621_begin_0, end = var_19621_end_0, end_mask = var_19621_end_mask_0, squeeze_mask = var_19621_squeeze_mask_0, x = coreml_update_state_103)[name = string("op_19621_cast_fp16")]; - tensor var_19624_begin_0 = const()[name = string("op_19624_begin_0"), val = tensor([0, 0, 0])]; - tensor var_19624_end_0 = const()[name = string("op_19624_end_0"), val = tensor([1, 512, 256])]; - tensor var_19624_end_mask_0 = const()[name = string("op_19624_end_mask_0"), val = tensor([false, true, true])]; - tensor var_19624_squeeze_mask_0 = const()[name = string("op_19624_squeeze_mask_0"), val = tensor([true, false, false])]; - tensor var_19624_cast_fp16 = slice_by_index(begin = var_19624_begin_0, end = var_19624_end_0, end_mask = var_19624_end_mask_0, squeeze_mask = var_19624_squeeze_mask_0, x = var_19621_cast_fp16)[name = string("op_19624_cast_fp16")]; - tensor var_19627_begin_0 = const()[name = string("op_19627_begin_0"), val = tensor([0, 0])]; - tensor var_19627_end_0 = const()[name = string("op_19627_end_0"), val = tensor([1, 256])]; - tensor var_19627_end_mask_0 = const()[name = string("op_19627_end_mask_0"), val = tensor([false, true])]; - tensor var_19627_squeeze_mask_0 = const()[name = string("op_19627_squeeze_mask_0"), val = tensor([true, false])]; - tensor var_19627_cast_fp16 = slice_by_index(begin = var_19627_begin_0, end = var_19627_end_0, end_mask = var_19627_end_mask_0, squeeze_mask = var_19627_squeeze_mask_0, x = var_19624_cast_fp16)[name = string("op_19627_cast_fp16")]; - tensor var_19630_begin_0 = const()[name = string("op_19630_begin_0"), val = tensor([0])]; - tensor var_19630_end_0 = const()[name = string("op_19630_end_0"), val = tensor([1])]; - tensor var_19630_end_mask_0 = const()[name = string("op_19630_end_mask_0"), val = tensor([false])]; - tensor var_19630_squeeze_mask_0 = const()[name = string("op_19630_squeeze_mask_0"), val = tensor([true])]; - fp16 var_19630_cast_fp16 = slice_by_index(begin = var_19630_begin_0, end = var_19630_end_0, end_mask = var_19630_end_mask_0, squeeze_mask = var_19630_squeeze_mask_0, x = var_19627_cast_fp16)[name = string("op_19630_cast_fp16")]; - fp16 var_19631_to_fp16 = const()[name = string("op_19631_to_fp16"), val = fp16(0x0p+0)]; - fp16 dummy_local_cast_fp16 = mul(x = var_19630_cast_fp16, y = var_19631_to_fp16)[name = string("dummy_local_cast_fp16")]; - tensor var_19635_begin_0 = const()[name = string("op_19635_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_19635_end_0 = const()[name = string("op_19635_end_0"), val = tensor([1, 1, 4096, 256])]; - tensor var_19635_end_mask_0 = const()[name = string("op_19635_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_19635_squeeze_mask_0 = const()[name = string("op_19635_squeeze_mask_0"), val = tensor([true, false, false, false])]; - tensor var_19635_cast_fp16 = slice_by_index(begin = var_19635_begin_0, end = var_19635_end_0, end_mask = var_19635_end_mask_0, squeeze_mask = var_19635_squeeze_mask_0, x = coreml_update_state_99)[name = string("op_19635_cast_fp16")]; - tensor var_19638_begin_0 = const()[name = string("op_19638_begin_0"), val = tensor([0, 0, 0])]; - tensor var_19638_end_0 = const()[name = string("op_19638_end_0"), val = tensor([1, 4096, 256])]; - tensor var_19638_end_mask_0 = const()[name = string("op_19638_end_mask_0"), val = tensor([false, true, true])]; - tensor var_19638_squeeze_mask_0 = const()[name = string("op_19638_squeeze_mask_0"), val = tensor([true, false, false])]; - tensor var_19638_cast_fp16 = slice_by_index(begin = var_19638_begin_0, end = var_19638_end_0, end_mask = var_19638_end_mask_0, squeeze_mask = var_19638_squeeze_mask_0, x = var_19635_cast_fp16)[name = string("op_19638_cast_fp16")]; - tensor var_19641_begin_0 = const()[name = string("op_19641_begin_0"), val = tensor([0, 0])]; - tensor var_19641_end_0 = const()[name = string("op_19641_end_0"), val = tensor([1, 256])]; - tensor var_19641_end_mask_0 = const()[name = string("op_19641_end_mask_0"), val = tensor([false, true])]; - tensor var_19641_squeeze_mask_0 = const()[name = string("op_19641_squeeze_mask_0"), val = tensor([true, false])]; - tensor var_19641_cast_fp16 = slice_by_index(begin = var_19641_begin_0, end = var_19641_end_0, end_mask = var_19641_end_mask_0, squeeze_mask = var_19641_squeeze_mask_0, x = var_19638_cast_fp16)[name = string("op_19641_cast_fp16")]; - tensor var_19644_begin_0 = const()[name = string("op_19644_begin_0"), val = tensor([0])]; - tensor var_19644_end_0 = const()[name = string("op_19644_end_0"), val = tensor([1])]; - tensor var_19644_end_mask_0 = const()[name = string("op_19644_end_mask_0"), val = tensor([false])]; - tensor var_19644_squeeze_mask_0 = const()[name = string("op_19644_squeeze_mask_0"), val = tensor([true])]; - fp16 var_19644_cast_fp16 = slice_by_index(begin = var_19644_begin_0, end = var_19644_end_0, end_mask = var_19644_end_mask_0, squeeze_mask = var_19644_squeeze_mask_0, x = var_19641_cast_fp16)[name = string("op_19644_cast_fp16")]; - fp16 var_19645_to_fp16 = const()[name = string("op_19645_to_fp16"), val = fp16(0x0p+0)]; - fp16 dummy_global_cast_fp16 = mul(x = var_19644_cast_fp16, y = var_19645_to_fp16)[name = string("dummy_global_cast_fp16")]; - fp16 var_19648_cast_fp16 = add(x = dummy_local_cast_fp16, y = dummy_global_cast_fp16)[name = string("op_19648_cast_fp16")]; - tensor var_19652 = const()[name = string("op_19652"), val = tensor([1, 1, 1])]; - tensor var_19653_cast_fp16 = reshape(shape = var_19652, x = var_19648_cast_fp16)[name = string("op_19653_cast_fp16")]; - tensor out_cast_fp16 = add(x = out_1_cast_fp16, y = var_19653_cast_fp16)[name = string("out_cast_fp16")]; - tensor var_19665_begin_0 = const()[name = string("op_19665_begin_0"), val = tensor([0, 0, 0])]; - tensor var_19665_end_0 = const()[name = string("op_19665_end_0"), val = tensor([1, 1, 1152])]; - tensor var_19665_end_mask_0 = const()[name = string("op_19665_end_mask_0"), val = tensor([true, false, true])]; - tensor output_hidden_states = slice_by_index(begin = var_19665_begin_0, end = var_19665_end_0, end_mask = var_19665_end_mask_0, x = out_cast_fp16)[name = string("op_19665_cast_fp16")]; - } -> (output_hidden_states); - func prefill_rotate(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_global, state> model_model_kv_cache_local, tensor position_ids) { - tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1189087680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1189972480))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1189988928))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1190210176))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1126720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1347968))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547769600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1190214336))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2253376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2474624))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2478784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2700032))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2704192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1190230784))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3605440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549580416))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3830848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4052096))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1190247232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1191132032))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(549605184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1191148480))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871382656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1191152640))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5408320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1191156800))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6309568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1191173248))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6534976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1191177408))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550744320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1191181568))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7661632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1191198016))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7887040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1191202176))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551653888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1191206336))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9013696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1191222784))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9239104))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1191226944))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9464512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1191231104))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10365760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1191247552))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10591168))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1191251712))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1191255872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1192140672))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11717824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1192157120))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11943232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1192161280))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1192165440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1193050240))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13069888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1193066688))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13295296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1193070848))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1193075008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1193959808))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14421952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1193976256))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14647360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14868608))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14872768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1193980416))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15774016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1193996864))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15999424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194001024))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16224832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194005184))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17126080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17347328))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17351488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194021632))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17576896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194025792))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18478144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18699392))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18703552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18924800))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194042240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194927040))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19830208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20051456))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20055616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194943488))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194947648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1195832448))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21182272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21403520))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21407680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21628928))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21633088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1195848896))))[name = string("model_model_layers_16_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_16_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22534336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1195865344))))[name = string("model_model_layers_16_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_16_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22759744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1195869504))))[name = string("model_model_layers_16_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557435584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1195873664))))[name = string("model_model_layers_17_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_17_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1195890112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1196111360))))[name = string("model_model_layers_17_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_17_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24111808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24333056))))[name = string("model_model_layers_17_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24337216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1196115520))))[name = string("model_model_layers_18_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_18_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25238464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558578688))))[name = string("model_model_layers_18_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_18_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25463872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1196131968))))[name = string("model_model_layers_18_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25689280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1196136128))))[name = string("model_model_layers_19_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_19_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26590528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26811776))))[name = string("model_model_layers_19_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_19_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26815936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1196152576))))[name = string("model_model_layers_19_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27041344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1196156736))))[name = string("model_model_layers_20_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_20_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27942592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1196173184))))[name = string("model_model_layers_20_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_20_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28168000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28389248))))[name = string("model_model_layers_20_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_21_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28393408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1196177344))))[name = string("model_model_layers_21_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_21_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29294656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1196193792))))[name = string("model_model_layers_21_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_21_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29520064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1196197952))))[name = string("model_model_layers_21_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_22_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1196202112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197086912))))[name = string("model_model_layers_22_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_22_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30646720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197103360))))[name = string("model_model_layers_22_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_22_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30872128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197107520))))[name = string("model_model_layers_22_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_23_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31097536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197111680))))[name = string("model_model_layers_23_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_23_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31998784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197128128))))[name = string("model_model_layers_23_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_23_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197132288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197353536))))[name = string("model_model_layers_23_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_24_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32449600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197357696))))[name = string("model_model_layers_24_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_24_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33350848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197374144))))[name = string("model_model_layers_24_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_24_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33576256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197378304))))[name = string("model_model_layers_24_self_attn_v_proj_weight_palettized")]; - tensor model_model_layers_25_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560058752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197382464))))[name = string("model_model_layers_25_self_attn_q_proj_weight_palettized")]; - tensor model_model_layers_25_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34702912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197398912))))[name = string("model_model_layers_25_self_attn_k_proj_weight_palettized")]; - tensor model_model_layers_25_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34928320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197403072))))[name = string("model_model_layers_25_self_attn_v_proj_weight_palettized")]; - int32 var_1618_batch_dims_0 = const()[name = string("op_1618_batch_dims_0"), val = int32(0)]; - bool var_1618_validate_indices_0 = const()[name = string("op_1618_validate_indices_0"), val = bool(false)]; - tensor var_1610_to_fp16 = const()[name = string("op_1610_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39348096)))]; - string position_ids_to_int16_dtype_0 = const()[name = string("position_ids_to_int16_dtype_0"), val = string("int16")]; - string cast_266_dtype_0 = const()[name = string("cast_266_dtype_0"), val = string("int32")]; - int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; - tensor position_ids_to_int16 = cast(dtype = position_ids_to_int16_dtype_0, x = position_ids)[name = string("cast_5")]; - tensor cast_266 = cast(dtype = cast_266_dtype_0, x = position_ids_to_int16)[name = string("cast_4")]; - tensor greater_equal_0 = greater_equal(x = cast_266, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; - int32 slice_by_index_208 = const()[name = string("slice_by_index_208"), val = int32(8192)]; - tensor add_0 = add(x = cast_266, y = slice_by_index_208)[name = string("add_0")]; - tensor select_0 = select(a = cast_266, b = add_0, cond = greater_equal_0)[name = string("select_0")]; - string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")]; - string cast_0_dtype_0 = const()[name = string("cast_0_dtype_0"), val = string("int32")]; - int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)]; - tensor select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_3")]; - tensor cast_0 = cast(dtype = cast_0_dtype_0, x = select_0_to_int16)[name = string("cast_2")]; - tensor greater_equal_0_1 = greater_equal(x = cast_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")]; - int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(8192)]; - tensor add_0_1 = add(x = cast_0, y = slice_by_index_0)[name = string("add_0_1")]; - tensor select_0_1 = select(a = cast_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")]; - int32 op_1618_cast_fp16_cast_uint16_cast_uint16_axis_0 = const()[name = string("op_1618_cast_fp16_cast_uint16_cast_uint16_axis_0"), val = int32(1)]; - tensor op_1618_cast_fp16_cast_uint16_cast_uint16 = gather(axis = op_1618_cast_fp16_cast_uint16_cast_uint16_axis_0, batch_dims = var_1618_batch_dims_0, indices = select_0_1, validate_indices = var_1618_validate_indices_0, x = var_1610_to_fp16)[name = string("op_1618_cast_fp16_cast_uint16_cast_uint16")]; - tensor var_1622 = const()[name = string("op_1622"), val = tensor([1, 64, 1, 256])]; - tensor cos_1_cast_fp16 = reshape(shape = var_1622, x = op_1618_cast_fp16_cast_uint16_cast_uint16)[name = string("cos_1_cast_fp16")]; - int32 var_1632_axis_0 = const()[name = string("op_1632_axis_0"), val = int32(1)]; - int32 var_1632_batch_dims_0 = const()[name = string("op_1632_batch_dims_0"), val = int32(0)]; - bool var_1632_validate_indices_0 = const()[name = string("op_1632_validate_indices_0"), val = bool(false)]; - tensor var_1624_to_fp16 = const()[name = string("op_1624_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35153728)))]; - string position_ids_to_uint16_dtype_0 = const()[name = string("position_ids_to_uint16_dtype_0"), val = string("uint16")]; - tensor position_ids_to_uint16 = cast(dtype = position_ids_to_uint16_dtype_0, x = position_ids)[name = string("cast_1")]; - tensor var_1632_cast_fp16_cast_uint16 = gather(axis = var_1632_axis_0, batch_dims = var_1632_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_1632_validate_indices_0, x = var_1624_to_fp16)[name = string("op_1632_cast_fp16_cast_uint16")]; - tensor var_1636 = const()[name = string("op_1636"), val = tensor([1, 64, 1, 256])]; - tensor sin_1_cast_fp16 = reshape(shape = var_1636, x = var_1632_cast_fp16_cast_uint16)[name = string("sin_1_cast_fp16")]; - int32 var_1657 = const()[name = string("op_1657"), val = int32(-1)]; - fp16 const_1_promoted_to_fp16 = const()[name = string("const_1_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_1659_cast_fp16 = mul(x = hidden_states, y = const_1_promoted_to_fp16)[name = string("op_1659_cast_fp16")]; - bool input_1_interleave_0 = const()[name = string("input_1_interleave_0"), val = bool(false)]; - tensor input_1_cast_fp16 = concat(axis = var_1657, interleave = input_1_interleave_0, values = (hidden_states, var_1659_cast_fp16))[name = string("input_1_cast_fp16")]; - tensor normed_1_axes_0 = const()[name = string("normed_1_axes_0"), val = tensor([-1])]; - fp16 var_1654_to_fp16 = const()[name = string("op_1654_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_1_cast_fp16 = layer_norm(axes = normed_1_axes_0, epsilon = var_1654_to_fp16, x = input_1_cast_fp16)[name = string("normed_1_cast_fp16")]; - tensor normed_3_begin_0 = const()[name = string("normed_3_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_3_end_0 = const()[name = string("normed_3_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_3_end_mask_0 = const()[name = string("normed_3_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_3_cast_fp16 = slice_by_index(begin = normed_3_begin_0, end = normed_3_end_0, end_mask = normed_3_end_mask_0, x = normed_1_cast_fp16)[name = string("normed_3_cast_fp16")]; - tensor var_1673_to_fp16 = const()[name = string("op_1673_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43542464)))]; - tensor hidden_states_3_cast_fp16 = mul(x = normed_3_cast_fp16, y = var_1673_to_fp16)[name = string("hidden_states_3_cast_fp16")]; - tensor var_1684 = const()[name = string("op_1684"), val = tensor([0, 2, 1])]; - tensor var_1687_axes_0 = const()[name = string("op_1687_axes_0"), val = tensor([2])]; - tensor var_1685_cast_fp16 = transpose(perm = var_1684, x = hidden_states_3_cast_fp16)[name = string("transpose_237")]; - tensor var_1687_cast_fp16 = expand_dims(axes = var_1687_axes_0, x = var_1685_cast_fp16)[name = string("op_1687_cast_fp16")]; - string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; - tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; - tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; - int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; - tensor query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_1687_cast_fp16)[name = string("query_states_1")]; - string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; - tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; - tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; - int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; - tensor key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_1687_cast_fp16)[name = string("key_states_1")]; - string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; - tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; - tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; - int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; - tensor value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_1687_cast_fp16)[name = string("value_states_1")]; - tensor var_1729 = const()[name = string("op_1729"), val = tensor([1, 4, 256, 64])]; - tensor var_1730 = reshape(shape = var_1729, x = query_states_1)[name = string("op_1730")]; - tensor var_1735 = const()[name = string("op_1735"), val = tensor([0, 1, 3, 2])]; - tensor var_1740 = const()[name = string("op_1740"), val = tensor([1, 1, 256, 64])]; - tensor var_1741 = reshape(shape = var_1740, x = key_states_1)[name = string("op_1741")]; - tensor var_1746 = const()[name = string("op_1746"), val = tensor([0, 1, 3, 2])]; - tensor var_1751 = const()[name = string("op_1751"), val = tensor([1, 1, 256, 64])]; - tensor var_1752 = reshape(shape = var_1751, x = value_states_1)[name = string("op_1752")]; - tensor var_1757 = const()[name = string("op_1757"), val = tensor([0, 1, 3, 2])]; - int32 var_1768 = const()[name = string("op_1768"), val = int32(-1)]; - fp16 const_6_promoted = const()[name = string("const_6_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_5 = transpose(perm = var_1735, x = var_1730)[name = string("transpose_236")]; - tensor var_1770 = mul(x = hidden_states_5, y = const_6_promoted)[name = string("op_1770")]; - bool input_5_interleave_0 = const()[name = string("input_5_interleave_0"), val = bool(false)]; - tensor input_5 = concat(axis = var_1768, interleave = input_5_interleave_0, values = (hidden_states_5, var_1770))[name = string("input_5")]; - tensor normed_5_axes_0 = const()[name = string("normed_5_axes_0"), val = tensor([-1])]; - fp16 var_1765_to_fp16 = const()[name = string("op_1765_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_5_cast_fp16 = layer_norm(axes = normed_5_axes_0, epsilon = var_1765_to_fp16, x = input_5)[name = string("normed_5_cast_fp16")]; - tensor normed_7_begin_0 = const()[name = string("normed_7_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_7_end_0 = const()[name = string("normed_7_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_7_end_mask_0 = const()[name = string("normed_7_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_7 = slice_by_index(begin = normed_7_begin_0, end = normed_7_end_0, end_mask = normed_7_end_mask_0, x = normed_5_cast_fp16)[name = string("normed_7")]; - tensor var_1784_to_fp16 = const()[name = string("op_1784_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43544832)))]; - tensor q_1_cast_fp16 = mul(x = normed_7, y = var_1784_to_fp16)[name = string("q_1_cast_fp16")]; - int32 var_1795 = const()[name = string("op_1795"), val = int32(-1)]; - fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_7 = transpose(perm = var_1746, x = var_1741)[name = string("transpose_235")]; - tensor var_1797 = mul(x = hidden_states_7, y = const_10_promoted)[name = string("op_1797")]; - bool input_7_interleave_0 = const()[name = string("input_7_interleave_0"), val = bool(false)]; - tensor input_7 = concat(axis = var_1795, interleave = input_7_interleave_0, values = (hidden_states_7, var_1797))[name = string("input_7")]; - tensor normed_9_axes_0 = const()[name = string("normed_9_axes_0"), val = tensor([-1])]; - fp16 var_1792_to_fp16 = const()[name = string("op_1792_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_9_cast_fp16 = layer_norm(axes = normed_9_axes_0, epsilon = var_1792_to_fp16, x = input_7)[name = string("normed_9_cast_fp16")]; - tensor normed_11_begin_0 = const()[name = string("normed_11_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_11_end_0 = const()[name = string("normed_11_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_11_end_mask_0 = const()[name = string("normed_11_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_11 = slice_by_index(begin = normed_11_begin_0, end = normed_11_end_0, end_mask = normed_11_end_mask_0, x = normed_9_cast_fp16)[name = string("normed_11")]; - tensor var_1811_to_fp16 = const()[name = string("op_1811_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43545408)))]; - tensor k_1_cast_fp16 = mul(x = normed_11, y = var_1811_to_fp16)[name = string("k_1_cast_fp16")]; - tensor var_1817 = const()[name = string("op_1817"), val = tensor([0, 2, 1, 3])]; - tensor var_1823 = const()[name = string("op_1823"), val = tensor([0, 2, 1, 3])]; - tensor cos_5 = transpose(perm = var_1817, x = cos_1_cast_fp16)[name = string("transpose_234")]; - tensor var_1825_cast_fp16 = mul(x = q_1_cast_fp16, y = cos_5)[name = string("op_1825_cast_fp16")]; - tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1_cast_fp16)[name = string("x1_1_cast_fp16")]; - tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1_cast_fp16)[name = string("x2_1_cast_fp16")]; - fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_1846_cast_fp16 = mul(x = x2_1_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1846_cast_fp16")]; - int32 var_1848 = const()[name = string("op_1848"), val = int32(-1)]; - bool var_1849_interleave_0 = const()[name = string("op_1849_interleave_0"), val = bool(false)]; - tensor var_1849_cast_fp16 = concat(axis = var_1848, interleave = var_1849_interleave_0, values = (var_1846_cast_fp16, x1_1_cast_fp16))[name = string("op_1849_cast_fp16")]; - tensor sin_5 = transpose(perm = var_1823, x = sin_1_cast_fp16)[name = string("transpose_233")]; - tensor var_1850_cast_fp16 = mul(x = var_1849_cast_fp16, y = sin_5)[name = string("op_1850_cast_fp16")]; - tensor query_states_3_cast_fp16 = add(x = var_1825_cast_fp16, y = var_1850_cast_fp16)[name = string("query_states_3_cast_fp16")]; - tensor var_1853_cast_fp16 = mul(x = k_1_cast_fp16, y = cos_5)[name = string("op_1853_cast_fp16")]; - tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1_cast_fp16)[name = string("x1_3_cast_fp16")]; - tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1_cast_fp16)[name = string("x2_3_cast_fp16")]; - fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_1874_cast_fp16 = mul(x = x2_3_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1874_cast_fp16")]; - int32 var_1876 = const()[name = string("op_1876"), val = int32(-1)]; - bool var_1877_interleave_0 = const()[name = string("op_1877_interleave_0"), val = bool(false)]; - tensor var_1877_cast_fp16 = concat(axis = var_1876, interleave = var_1877_interleave_0, values = (var_1874_cast_fp16, x1_3_cast_fp16))[name = string("op_1877_cast_fp16")]; - tensor var_1878_cast_fp16 = mul(x = var_1877_cast_fp16, y = sin_5)[name = string("op_1878_cast_fp16")]; - tensor key_states_3_cast_fp16 = add(x = var_1853_cast_fp16, y = var_1878_cast_fp16)[name = string("key_states_3_cast_fp16")]; - tensor read_state_0 = read_state(input = model_model_kv_cache_local)[name = string("read_state_0")]; - tensor key_slice_1_begin_0 = const()[name = string("key_slice_1_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor key_slice_1_end_0 = const()[name = string("key_slice_1_end_0"), val = tensor([1, 1, 512, 256])]; - tensor key_slice_1_end_mask_0 = const()[name = string("key_slice_1_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_1_cast_fp16 = slice_by_index(begin = key_slice_1_begin_0, end = key_slice_1_end_0, end_mask = key_slice_1_end_mask_0, x = read_state_0)[name = string("key_slice_1_cast_fp16")]; - tensor var_1915_begin_0 = const()[name = string("op_1915_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_1915_end_0 = const()[name = string("op_1915_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_1915_end_mask_0 = const()[name = string("op_1915_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_1915_cast_fp16 = slice_by_index(begin = var_1915_begin_0, end = var_1915_end_0, end_mask = var_1915_end_mask_0, x = key_slice_1_cast_fp16)[name = string("op_1915_cast_fp16")]; - int32 var_1942 = const()[name = string("op_1942"), val = int32(2)]; - bool shifted_key_1_interleave_0 = const()[name = string("shifted_key_1_interleave_0"), val = bool(false)]; - tensor shifted_key_1_cast_fp16 = concat(axis = var_1942, interleave = shifted_key_1_interleave_0, values = (var_1915_cast_fp16, key_states_3_cast_fp16))[name = string("shifted_key_1_cast_fp16")]; - tensor concat_0 = const()[name = string("concat_0"), val = tensor([0, 0, 0, 0])]; - tensor concat_1 = const()[name = string("concat_1"), val = tensor([1, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = model_model_kv_cache_local_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = model_model_kv_cache_local_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_1_stride_0, update = shifted_key_1_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_156_write_state")]; - tensor coreml_update_state_52 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_156")]; - tensor value_slice_1_begin_0 = const()[name = string("value_slice_1_begin_0"), val = tensor([22, 0, 0, 0])]; - tensor value_slice_1_end_0 = const()[name = string("value_slice_1_end_0"), val = tensor([23, 1, 512, 256])]; - tensor value_slice_1_end_mask_0 = const()[name = string("value_slice_1_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_1_cast_fp16 = slice_by_index(begin = value_slice_1_begin_0, end = value_slice_1_end_0, end_mask = value_slice_1_end_mask_0, x = coreml_update_state_52)[name = string("value_slice_1_cast_fp16")]; - tensor var_1985_begin_0 = const()[name = string("op_1985_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_1985_end_0 = const()[name = string("op_1985_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_1985_end_mask_0 = const()[name = string("op_1985_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_1985_cast_fp16 = slice_by_index(begin = var_1985_begin_0, end = var_1985_end_0, end_mask = var_1985_end_mask_0, x = value_slice_1_cast_fp16)[name = string("op_1985_cast_fp16")]; - int32 var_2012 = const()[name = string("op_2012"), val = int32(2)]; - bool shifted_value_1_interleave_0 = const()[name = string("shifted_value_1_interleave_0"), val = bool(false)]; - tensor value_states_3 = transpose(perm = var_1757, x = var_1752)[name = string("transpose_232")]; - tensor shifted_value_1_cast_fp16 = concat(axis = var_2012, interleave = shifted_value_1_interleave_0, values = (var_1985_cast_fp16, value_states_3))[name = string("shifted_value_1_cast_fp16")]; - tensor concat_2 = const()[name = string("concat_2"), val = tensor([22, 0, 0, 0])]; - tensor concat_3 = const()[name = string("concat_3"), val = tensor([23, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_local_internal_tensor_assign_2_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_local_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_2_stride_0, update = shifted_value_1_cast_fp16, x = coreml_update_state_52)[name = string("model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_157_write_state")]; - tensor coreml_update_state_53 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_157")]; - tensor var_2040_begin_0 = const()[name = string("op_2040_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2040_end_0 = const()[name = string("op_2040_end_0"), val = tensor([1, 1, 512, 256])]; - tensor var_2040_end_mask_0 = const()[name = string("op_2040_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_2040_cast_fp16 = slice_by_index(begin = var_2040_begin_0, end = var_2040_end_0, end_mask = var_2040_end_mask_0, x = coreml_update_state_53)[name = string("op_2040_cast_fp16")]; - tensor var_2047_begin_0 = const()[name = string("op_2047_begin_0"), val = tensor([22, 0, 0, 0])]; - tensor var_2047_end_0 = const()[name = string("op_2047_end_0"), val = tensor([23, 1, 512, 256])]; - tensor var_2047_end_mask_0 = const()[name = string("op_2047_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_2047_cast_fp16 = slice_by_index(begin = var_2047_begin_0, end = var_2047_end_0, end_mask = var_2047_end_mask_0, x = coreml_update_state_53)[name = string("op_2047_cast_fp16")]; - tensor var_2086 = const()[name = string("op_2086"), val = tensor([1, 4, 1, 1])]; - tensor x_5_cast_fp16 = tile(reps = var_2086, x = var_2040_cast_fp16)[name = string("x_5_cast_fp16")]; - tensor var_2106 = const()[name = string("op_2106"), val = tensor([1, 4, 1, 1])]; - tensor x_11_cast_fp16 = tile(reps = var_2106, x = var_2047_cast_fp16)[name = string("x_11_cast_fp16")]; - bool var_2133_transpose_x_0 = const()[name = string("op_2133_transpose_x_0"), val = bool(false)]; - bool var_2133_transpose_y_0 = const()[name = string("op_2133_transpose_y_0"), val = bool(true)]; - tensor var_2133 = matmul(transpose_x = var_2133_transpose_x_0, transpose_y = var_2133_transpose_y_0, x = query_states_3_cast_fp16, y = x_5_cast_fp16)[name = string("op_2133")]; - fp16 var_2134_to_fp16 = const()[name = string("op_2134_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_1_cast_fp16 = mul(x = var_2133, y = var_2134_to_fp16)[name = string("attn_weights_1_cast_fp16")]; - tensor mask_slice_1_begin_0 = const()[name = string("mask_slice_1_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor mask_slice_1_end_0 = const()[name = string("mask_slice_1_end_0"), val = tensor([1, 1, 64, 512])]; - tensor mask_slice_1_end_mask_0 = const()[name = string("mask_slice_1_end_mask_0"), val = tensor([true, true, true, false])]; - tensor mask_slice_1 = slice_by_index(begin = mask_slice_1_begin_0, end = mask_slice_1_end_0, end_mask = mask_slice_1_end_mask_0, x = causal_mask)[name = string("mask_slice_1")]; - tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask_slice_1)[name = string("attn_weights_3_cast_fp16")]; - int32 var_2169 = const()[name = string("op_2169"), val = int32(-1)]; - tensor var_2171_cast_fp16 = softmax(axis = var_2169, x = attn_weights_3_cast_fp16)[name = string("op_2171_cast_fp16")]; - tensor concat_8 = const()[name = string("concat_8"), val = tensor([4, 64, 512])]; - tensor reshape_0_cast_fp16 = reshape(shape = concat_8, x = var_2171_cast_fp16)[name = string("reshape_0_cast_fp16")]; - tensor concat_9 = const()[name = string("concat_9"), val = tensor([4, 512, 256])]; - tensor reshape_1_cast_fp16 = reshape(shape = concat_9, x = x_11_cast_fp16)[name = string("reshape_1_cast_fp16")]; - bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; - bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)]; - tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; - tensor concat_13 = const()[name = string("concat_13"), val = tensor([1, 4, 64, 256])]; - tensor reshape_2_cast_fp16 = reshape(shape = concat_13, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; - tensor var_2183_perm_0 = const()[name = string("op_2183_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_2202 = const()[name = string("op_2202"), val = tensor([1, 64, 1024])]; - tensor var_2183_cast_fp16 = transpose(perm = var_2183_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_231")]; - tensor attn_output_5_cast_fp16 = reshape(shape = var_2202, x = var_2183_cast_fp16)[name = string("attn_output_5_cast_fp16")]; - tensor var_2207 = const()[name = string("op_2207"), val = tensor([0, 2, 1])]; - string var_2223_pad_type_0 = const()[name = string("op_2223_pad_type_0"), val = string("valid")]; - int32 var_2223_groups_0 = const()[name = string("op_2223_groups_0"), val = int32(1)]; - tensor var_2223_strides_0 = const()[name = string("op_2223_strides_0"), val = tensor([1])]; - tensor var_2223_pad_0 = const()[name = string("op_2223_pad_0"), val = tensor([0, 0])]; - tensor var_2223_dilations_0 = const()[name = string("op_2223_dilations_0"), val = tensor([1])]; - tensor squeeze_0_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43545984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44430784))))[name = string("squeeze_0_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_2208_cast_fp16 = transpose(perm = var_2207, x = attn_output_5_cast_fp16)[name = string("transpose_230")]; - tensor var_2223_cast_fp16 = conv(dilations = var_2223_dilations_0, groups = var_2223_groups_0, pad = var_2223_pad_0, pad_type = var_2223_pad_type_0, strides = var_2223_strides_0, weight = squeeze_0_cast_fp16_to_fp32_to_fp16_palettized, x = var_2208_cast_fp16)[name = string("op_2223_cast_fp16")]; - tensor var_2227 = const()[name = string("op_2227"), val = tensor([0, 2, 1])]; - int32 var_2238 = const()[name = string("op_2238"), val = int32(-1)]; - fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_9_cast_fp16 = transpose(perm = var_2227, x = var_2223_cast_fp16)[name = string("transpose_229")]; - tensor var_2240_cast_fp16 = mul(x = hidden_states_9_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_2240_cast_fp16")]; - bool input_11_interleave_0 = const()[name = string("input_11_interleave_0"), val = bool(false)]; - tensor input_11_cast_fp16 = concat(axis = var_2238, interleave = input_11_interleave_0, values = (hidden_states_9_cast_fp16, var_2240_cast_fp16))[name = string("input_11_cast_fp16")]; - tensor normed_13_axes_0 = const()[name = string("normed_13_axes_0"), val = tensor([-1])]; - fp16 var_2235_to_fp16 = const()[name = string("op_2235_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_13_cast_fp16 = layer_norm(axes = normed_13_axes_0, epsilon = var_2235_to_fp16, x = input_11_cast_fp16)[name = string("normed_13_cast_fp16")]; - tensor normed_15_begin_0 = const()[name = string("normed_15_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_15_end_0 = const()[name = string("normed_15_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_15_end_mask_0 = const()[name = string("normed_15_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_15_cast_fp16 = slice_by_index(begin = normed_15_begin_0, end = normed_15_end_0, end_mask = normed_15_end_mask_0, x = normed_13_cast_fp16)[name = string("normed_15_cast_fp16")]; - tensor var_2254_to_fp16 = const()[name = string("op_2254_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44449280)))]; - tensor attn_output_9_cast_fp16 = mul(x = normed_15_cast_fp16, y = var_2254_to_fp16)[name = string("attn_output_9_cast_fp16")]; - tensor hidden_states_11_cast_fp16 = add(x = hidden_states, y = attn_output_9_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; - int32 var_2267 = const()[name = string("op_2267"), val = int32(-1)]; - fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2269_cast_fp16 = mul(x = hidden_states_11_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_2269_cast_fp16")]; - bool input_13_interleave_0 = const()[name = string("input_13_interleave_0"), val = bool(false)]; - tensor input_13_cast_fp16 = concat(axis = var_2267, interleave = input_13_interleave_0, values = (hidden_states_11_cast_fp16, var_2269_cast_fp16))[name = string("input_13_cast_fp16")]; - tensor normed_17_axes_0 = const()[name = string("normed_17_axes_0"), val = tensor([-1])]; - fp16 var_2264_to_fp16 = const()[name = string("op_2264_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_17_cast_fp16 = layer_norm(axes = normed_17_axes_0, epsilon = var_2264_to_fp16, x = input_13_cast_fp16)[name = string("normed_17_cast_fp16")]; - tensor normed_19_begin_0 = const()[name = string("normed_19_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_19_end_0 = const()[name = string("normed_19_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_19_end_mask_0 = const()[name = string("normed_19_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_19_cast_fp16 = slice_by_index(begin = normed_19_begin_0, end = normed_19_end_0, end_mask = normed_19_end_mask_0, x = normed_17_cast_fp16)[name = string("normed_19_cast_fp16")]; - tensor var_2283_to_fp16 = const()[name = string("op_2283_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44451648)))]; - tensor x_13_cast_fp16 = mul(x = normed_19_cast_fp16, y = var_2283_to_fp16)[name = string("x_13_cast_fp16")]; - tensor var_2295 = const()[name = string("op_2295"), val = tensor([0, 2, 1])]; - tensor input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor([2])]; - tensor var_2296_cast_fp16 = transpose(perm = var_2295, x = x_13_cast_fp16)[name = string("transpose_228")]; - tensor input_15_cast_fp16 = expand_dims(axes = input_15_axes_0, x = var_2296_cast_fp16)[name = string("input_15_cast_fp16")]; - string x_15_pad_type_0 = const()[name = string("x_15_pad_type_0"), val = string("valid")]; - tensor x_15_strides_0 = const()[name = string("x_15_strides_0"), val = tensor([1, 1])]; - tensor x_15_pad_0 = const()[name = string("x_15_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_15_dilations_0 = const()[name = string("x_15_dilations_0"), val = tensor([1, 1])]; - int32 x_15_groups_0 = const()[name = string("x_15_groups_0"), val = int32(1)]; - tensor model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197407232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1203379264))))[name = string("model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_15_cast_fp16 = conv(dilations = x_15_dilations_0, groups = x_15_groups_0, pad = x_15_pad_0, pad_type = x_15_pad_type_0, strides = x_15_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_15_cast_fp16)[name = string("x_15_cast_fp16")]; - string b_1_pad_type_0 = const()[name = string("b_1_pad_type_0"), val = string("valid")]; - tensor b_1_strides_0 = const()[name = string("b_1_strides_0"), val = tensor([1, 1])]; - tensor b_1_pad_0 = const()[name = string("b_1_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_1_dilations_0 = const()[name = string("b_1_dilations_0"), val = tensor([1, 1])]; - int32 b_1_groups_0 = const()[name = string("b_1_groups_0"), val = int32(1)]; - tensor model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1203489920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1209461952))))[name = string("model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_1_cast_fp16 = conv(dilations = b_1_dilations_0, groups = b_1_groups_0, pad = b_1_pad_0, pad_type = b_1_pad_type_0, strides = b_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_15_cast_fp16)[name = string("b_1_cast_fp16")]; - string var_2321_mode_0 = const()[name = string("op_2321_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_2321_cast_fp16 = gelu(mode = var_2321_mode_0, x = x_15_cast_fp16)[name = string("op_2321_cast_fp16")]; - tensor input_17_cast_fp16 = mul(x = var_2321_cast_fp16, y = b_1_cast_fp16)[name = string("input_17_cast_fp16")]; - string e_1_pad_type_0 = const()[name = string("e_1_pad_type_0"), val = string("valid")]; - tensor e_1_strides_0 = const()[name = string("e_1_strides_0"), val = tensor([1, 1])]; - tensor e_1_pad_0 = const()[name = string("e_1_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_1_dilations_0 = const()[name = string("e_1_dilations_0"), val = tensor([1, 1])]; - int32 e_1_groups_0 = const()[name = string("e_1_groups_0"), val = int32(1)]; - tensor model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56619392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62591424))))[name = string("model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_1_cast_fp16 = conv(dilations = e_1_dilations_0, groups = e_1_groups_0, pad = e_1_pad_0, pad_type = e_1_pad_type_0, strides = e_1_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_17_cast_fp16)[name = string("e_1_cast_fp16")]; - tensor var_2329_axes_0 = const()[name = string("op_2329_axes_0"), val = tensor([2])]; - tensor var_2329_cast_fp16 = squeeze(axes = var_2329_axes_0, x = e_1_cast_fp16)[name = string("op_2329_cast_fp16")]; - tensor var_2330 = const()[name = string("op_2330"), val = tensor([0, 2, 1])]; - int32 var_2341 = const()[name = string("op_2341"), val = int32(-1)]; - fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_13_cast_fp16 = transpose(perm = var_2330, x = var_2329_cast_fp16)[name = string("transpose_227")]; - tensor var_2343_cast_fp16 = mul(x = hidden_states_13_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_2343_cast_fp16")]; - bool input_19_interleave_0 = const()[name = string("input_19_interleave_0"), val = bool(false)]; - tensor input_19_cast_fp16 = concat(axis = var_2341, interleave = input_19_interleave_0, values = (hidden_states_13_cast_fp16, var_2343_cast_fp16))[name = string("input_19_cast_fp16")]; - tensor normed_21_axes_0 = const()[name = string("normed_21_axes_0"), val = tensor([-1])]; - fp16 var_2338_to_fp16 = const()[name = string("op_2338_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_21_cast_fp16 = layer_norm(axes = normed_21_axes_0, epsilon = var_2338_to_fp16, x = input_19_cast_fp16)[name = string("normed_21_cast_fp16")]; - tensor normed_23_begin_0 = const()[name = string("normed_23_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_23_end_0 = const()[name = string("normed_23_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_23_end_mask_0 = const()[name = string("normed_23_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_23_cast_fp16 = slice_by_index(begin = normed_23_begin_0, end = normed_23_end_0, end_mask = normed_23_end_mask_0, x = normed_21_cast_fp16)[name = string("normed_23_cast_fp16")]; - tensor var_2357_to_fp16 = const()[name = string("op_2357_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62609920)))]; - tensor hidden_states_15_cast_fp16 = mul(x = normed_23_cast_fp16, y = var_2357_to_fp16)[name = string("hidden_states_15_cast_fp16")]; - tensor hidden_states_17_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; - int32 var_2411 = const()[name = string("op_2411"), val = int32(-1)]; - fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2413_cast_fp16 = mul(x = hidden_states_17_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_2413_cast_fp16")]; - bool input_21_interleave_0 = const()[name = string("input_21_interleave_0"), val = bool(false)]; - tensor input_21_cast_fp16 = concat(axis = var_2411, interleave = input_21_interleave_0, values = (hidden_states_17_cast_fp16, var_2413_cast_fp16))[name = string("input_21_cast_fp16")]; - tensor normed_25_axes_0 = const()[name = string("normed_25_axes_0"), val = tensor([-1])]; - fp16 var_2408_to_fp16 = const()[name = string("op_2408_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_25_cast_fp16 = layer_norm(axes = normed_25_axes_0, epsilon = var_2408_to_fp16, x = input_21_cast_fp16)[name = string("normed_25_cast_fp16")]; - tensor normed_27_begin_0 = const()[name = string("normed_27_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_27_end_0 = const()[name = string("normed_27_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_27_end_mask_0 = const()[name = string("normed_27_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_27_cast_fp16 = slice_by_index(begin = normed_27_begin_0, end = normed_27_end_0, end_mask = normed_27_end_mask_0, x = normed_25_cast_fp16)[name = string("normed_27_cast_fp16")]; - tensor var_2427_to_fp16 = const()[name = string("op_2427_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62612288)))]; - tensor hidden_states_19_cast_fp16 = mul(x = normed_27_cast_fp16, y = var_2427_to_fp16)[name = string("hidden_states_19_cast_fp16")]; - tensor var_2438 = const()[name = string("op_2438"), val = tensor([0, 2, 1])]; - tensor var_2441_axes_0 = const()[name = string("op_2441_axes_0"), val = tensor([2])]; - tensor var_2439_cast_fp16 = transpose(perm = var_2438, x = hidden_states_19_cast_fp16)[name = string("transpose_226")]; - tensor var_2441_cast_fp16 = expand_dims(axes = var_2441_axes_0, x = var_2439_cast_fp16)[name = string("op_2441_cast_fp16")]; - string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; - tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; - tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; - int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; - tensor query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_2441_cast_fp16)[name = string("query_states_9")]; - string key_states_11_pad_type_0 = const()[name = string("key_states_11_pad_type_0"), val = string("valid")]; - tensor key_states_11_strides_0 = const()[name = string("key_states_11_strides_0"), val = tensor([1, 1])]; - tensor key_states_11_pad_0 = const()[name = string("key_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_11_dilations_0 = const()[name = string("key_states_11_dilations_0"), val = tensor([1, 1])]; - int32 key_states_11_groups_0 = const()[name = string("key_states_11_groups_0"), val = int32(1)]; - tensor key_states_11 = conv(dilations = key_states_11_dilations_0, groups = key_states_11_groups_0, pad = key_states_11_pad_0, pad_type = key_states_11_pad_type_0, strides = key_states_11_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_2441_cast_fp16)[name = string("key_states_11")]; - string value_states_9_pad_type_0 = const()[name = string("value_states_9_pad_type_0"), val = string("valid")]; - tensor value_states_9_strides_0 = const()[name = string("value_states_9_strides_0"), val = tensor([1, 1])]; - tensor value_states_9_pad_0 = const()[name = string("value_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_9_dilations_0 = const()[name = string("value_states_9_dilations_0"), val = tensor([1, 1])]; - int32 value_states_9_groups_0 = const()[name = string("value_states_9_groups_0"), val = int32(1)]; - tensor value_states_9 = conv(dilations = value_states_9_dilations_0, groups = value_states_9_groups_0, pad = value_states_9_pad_0, pad_type = value_states_9_pad_type_0, strides = value_states_9_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_2441_cast_fp16)[name = string("value_states_9")]; - tensor var_2483 = const()[name = string("op_2483"), val = tensor([1, 4, 256, 64])]; - tensor var_2484 = reshape(shape = var_2483, x = query_states_9)[name = string("op_2484")]; - tensor var_2489 = const()[name = string("op_2489"), val = tensor([0, 1, 3, 2])]; - tensor var_2494 = const()[name = string("op_2494"), val = tensor([1, 1, 256, 64])]; - tensor var_2495 = reshape(shape = var_2494, x = key_states_11)[name = string("op_2495")]; - tensor var_2500 = const()[name = string("op_2500"), val = tensor([0, 1, 3, 2])]; - tensor var_2505 = const()[name = string("op_2505"), val = tensor([1, 1, 256, 64])]; - tensor var_2506 = reshape(shape = var_2505, x = value_states_9)[name = string("op_2506")]; - tensor var_2511 = const()[name = string("op_2511"), val = tensor([0, 1, 3, 2])]; - int32 var_2522 = const()[name = string("op_2522"), val = int32(-1)]; - fp16 const_48_promoted = const()[name = string("const_48_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_21 = transpose(perm = var_2489, x = var_2484)[name = string("transpose_225")]; - tensor var_2524 = mul(x = hidden_states_21, y = const_48_promoted)[name = string("op_2524")]; - bool input_25_interleave_0 = const()[name = string("input_25_interleave_0"), val = bool(false)]; - tensor input_25 = concat(axis = var_2522, interleave = input_25_interleave_0, values = (hidden_states_21, var_2524))[name = string("input_25")]; - tensor normed_29_axes_0 = const()[name = string("normed_29_axes_0"), val = tensor([-1])]; - fp16 var_2519_to_fp16 = const()[name = string("op_2519_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_29_cast_fp16 = layer_norm(axes = normed_29_axes_0, epsilon = var_2519_to_fp16, x = input_25)[name = string("normed_29_cast_fp16")]; - tensor normed_31_begin_0 = const()[name = string("normed_31_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_31_end_0 = const()[name = string("normed_31_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_31_end_mask_0 = const()[name = string("normed_31_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_31 = slice_by_index(begin = normed_31_begin_0, end = normed_31_end_0, end_mask = normed_31_end_mask_0, x = normed_29_cast_fp16)[name = string("normed_31")]; - tensor var_2538_to_fp16 = const()[name = string("op_2538_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62614656)))]; - tensor q_3_cast_fp16 = mul(x = normed_31, y = var_2538_to_fp16)[name = string("q_3_cast_fp16")]; - int32 var_2549 = const()[name = string("op_2549"), val = int32(-1)]; - fp16 const_52_promoted = const()[name = string("const_52_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_23 = transpose(perm = var_2500, x = var_2495)[name = string("transpose_224")]; - tensor var_2551 = mul(x = hidden_states_23, y = const_52_promoted)[name = string("op_2551")]; - bool input_27_interleave_0 = const()[name = string("input_27_interleave_0"), val = bool(false)]; - tensor input_27 = concat(axis = var_2549, interleave = input_27_interleave_0, values = (hidden_states_23, var_2551))[name = string("input_27")]; - tensor normed_33_axes_0 = const()[name = string("normed_33_axes_0"), val = tensor([-1])]; - fp16 var_2546_to_fp16 = const()[name = string("op_2546_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_33_cast_fp16 = layer_norm(axes = normed_33_axes_0, epsilon = var_2546_to_fp16, x = input_27)[name = string("normed_33_cast_fp16")]; - tensor normed_35_begin_0 = const()[name = string("normed_35_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_35_end_0 = const()[name = string("normed_35_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_35_end_mask_0 = const()[name = string("normed_35_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_35 = slice_by_index(begin = normed_35_begin_0, end = normed_35_end_0, end_mask = normed_35_end_mask_0, x = normed_33_cast_fp16)[name = string("normed_35")]; - tensor var_2565_to_fp16 = const()[name = string("op_2565_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62615232)))]; - tensor k_3_cast_fp16 = mul(x = normed_35, y = var_2565_to_fp16)[name = string("k_3_cast_fp16")]; - tensor var_2579_cast_fp16 = mul(x = q_3_cast_fp16, y = cos_5)[name = string("op_2579_cast_fp16")]; - tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3_cast_fp16)[name = string("x1_5_cast_fp16")]; - tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3_cast_fp16)[name = string("x2_5_cast_fp16")]; - fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2600_cast_fp16 = mul(x = x2_5_cast_fp16, y = const_58_promoted_to_fp16)[name = string("op_2600_cast_fp16")]; - int32 var_2602 = const()[name = string("op_2602"), val = int32(-1)]; - bool var_2603_interleave_0 = const()[name = string("op_2603_interleave_0"), val = bool(false)]; - tensor var_2603_cast_fp16 = concat(axis = var_2602, interleave = var_2603_interleave_0, values = (var_2600_cast_fp16, x1_5_cast_fp16))[name = string("op_2603_cast_fp16")]; - tensor var_2604_cast_fp16 = mul(x = var_2603_cast_fp16, y = sin_5)[name = string("op_2604_cast_fp16")]; - tensor query_states_11_cast_fp16 = add(x = var_2579_cast_fp16, y = var_2604_cast_fp16)[name = string("query_states_11_cast_fp16")]; - tensor var_2607_cast_fp16 = mul(x = k_3_cast_fp16, y = cos_5)[name = string("op_2607_cast_fp16")]; - tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3_cast_fp16)[name = string("x1_7_cast_fp16")]; - tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3_cast_fp16)[name = string("x2_7_cast_fp16")]; - fp16 const_61_promoted_to_fp16 = const()[name = string("const_61_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_2628_cast_fp16 = mul(x = x2_7_cast_fp16, y = const_61_promoted_to_fp16)[name = string("op_2628_cast_fp16")]; - int32 var_2630 = const()[name = string("op_2630"), val = int32(-1)]; - bool var_2631_interleave_0 = const()[name = string("op_2631_interleave_0"), val = bool(false)]; - tensor var_2631_cast_fp16 = concat(axis = var_2630, interleave = var_2631_interleave_0, values = (var_2628_cast_fp16, x1_7_cast_fp16))[name = string("op_2631_cast_fp16")]; - tensor var_2632_cast_fp16 = mul(x = var_2631_cast_fp16, y = sin_5)[name = string("op_2632_cast_fp16")]; - tensor key_states_13_cast_fp16 = add(x = var_2607_cast_fp16, y = var_2632_cast_fp16)[name = string("key_states_13_cast_fp16")]; - tensor key_slice_3_begin_0 = const()[name = string("key_slice_3_begin_0"), val = tensor([1, 0, 0, 0])]; - tensor key_slice_3_end_0 = const()[name = string("key_slice_3_end_0"), val = tensor([2, 1, 512, 256])]; - tensor key_slice_3_end_mask_0 = const()[name = string("key_slice_3_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_3_cast_fp16 = slice_by_index(begin = key_slice_3_begin_0, end = key_slice_3_end_0, end_mask = key_slice_3_end_mask_0, x = coreml_update_state_53)[name = string("key_slice_3_cast_fp16")]; - tensor var_2669_begin_0 = const()[name = string("op_2669_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_2669_end_0 = const()[name = string("op_2669_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_2669_end_mask_0 = const()[name = string("op_2669_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_2669_cast_fp16 = slice_by_index(begin = var_2669_begin_0, end = var_2669_end_0, end_mask = var_2669_end_mask_0, x = key_slice_3_cast_fp16)[name = string("op_2669_cast_fp16")]; - int32 var_2696 = const()[name = string("op_2696"), val = int32(2)]; - bool shifted_key_3_interleave_0 = const()[name = string("shifted_key_3_interleave_0"), val = bool(false)]; - tensor shifted_key_3_cast_fp16 = concat(axis = var_2696, interleave = shifted_key_3_interleave_0, values = (var_2669_cast_fp16, key_states_13_cast_fp16))[name = string("shifted_key_3_cast_fp16")]; - tensor concat_14 = const()[name = string("concat_14"), val = tensor([1, 0, 0, 0])]; - tensor concat_15 = const()[name = string("concat_15"), val = tensor([2, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_local_internal_tensor_assign_3_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_local_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_3_stride_0, update = shifted_key_3_cast_fp16, x = coreml_update_state_53)[name = string("model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_158_write_state")]; - tensor coreml_update_state_54 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_158")]; - tensor value_slice_3_begin_0 = const()[name = string("value_slice_3_begin_0"), val = tensor([23, 0, 0, 0])]; - tensor value_slice_3_end_0 = const()[name = string("value_slice_3_end_0"), val = tensor([24, 1, 512, 256])]; - tensor value_slice_3_end_mask_0 = const()[name = string("value_slice_3_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_3_cast_fp16 = slice_by_index(begin = value_slice_3_begin_0, end = value_slice_3_end_0, end_mask = value_slice_3_end_mask_0, x = coreml_update_state_54)[name = string("value_slice_3_cast_fp16")]; - tensor var_2739_begin_0 = const()[name = string("op_2739_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_2739_end_0 = const()[name = string("op_2739_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_2739_end_mask_0 = const()[name = string("op_2739_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_2739_cast_fp16 = slice_by_index(begin = var_2739_begin_0, end = var_2739_end_0, end_mask = var_2739_end_mask_0, x = value_slice_3_cast_fp16)[name = string("op_2739_cast_fp16")]; - int32 var_2766 = const()[name = string("op_2766"), val = int32(2)]; - bool shifted_value_3_interleave_0 = const()[name = string("shifted_value_3_interleave_0"), val = bool(false)]; - tensor value_states_11 = transpose(perm = var_2511, x = var_2506)[name = string("transpose_223")]; - tensor shifted_value_3_cast_fp16 = concat(axis = var_2766, interleave = shifted_value_3_interleave_0, values = (var_2739_cast_fp16, value_states_11))[name = string("shifted_value_3_cast_fp16")]; - tensor concat_16 = const()[name = string("concat_16"), val = tensor([23, 0, 0, 0])]; - tensor concat_17 = const()[name = string("concat_17"), val = tensor([24, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_16, begin_mask = model_model_kv_cache_local_internal_tensor_assign_4_begin_mask_0, end = concat_17, end_mask = model_model_kv_cache_local_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_4_stride_0, update = shifted_value_3_cast_fp16, x = coreml_update_state_54)[name = string("model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_159_write_state")]; - tensor coreml_update_state_55 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_159")]; - tensor var_2794_begin_0 = const()[name = string("op_2794_begin_0"), val = tensor([1, 0, 0, 0])]; - tensor var_2794_end_0 = const()[name = string("op_2794_end_0"), val = tensor([2, 1, 512, 256])]; - tensor var_2794_end_mask_0 = const()[name = string("op_2794_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_2794_cast_fp16 = slice_by_index(begin = var_2794_begin_0, end = var_2794_end_0, end_mask = var_2794_end_mask_0, x = coreml_update_state_55)[name = string("op_2794_cast_fp16")]; - tensor var_2801_begin_0 = const()[name = string("op_2801_begin_0"), val = tensor([23, 0, 0, 0])]; - tensor var_2801_end_0 = const()[name = string("op_2801_end_0"), val = tensor([24, 1, 512, 256])]; - tensor var_2801_end_mask_0 = const()[name = string("op_2801_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_2801_cast_fp16 = slice_by_index(begin = var_2801_begin_0, end = var_2801_end_0, end_mask = var_2801_end_mask_0, x = coreml_update_state_55)[name = string("op_2801_cast_fp16")]; - tensor var_2840 = const()[name = string("op_2840"), val = tensor([1, 4, 1, 1])]; - tensor x_21_cast_fp16 = tile(reps = var_2840, x = var_2794_cast_fp16)[name = string("x_21_cast_fp16")]; - tensor var_2860 = const()[name = string("op_2860"), val = tensor([1, 4, 1, 1])]; - tensor x_27_cast_fp16 = tile(reps = var_2860, x = var_2801_cast_fp16)[name = string("x_27_cast_fp16")]; - bool var_2887_transpose_x_0 = const()[name = string("op_2887_transpose_x_0"), val = bool(false)]; - bool var_2887_transpose_y_0 = const()[name = string("op_2887_transpose_y_0"), val = bool(true)]; - tensor var_2887 = matmul(transpose_x = var_2887_transpose_x_0, transpose_y = var_2887_transpose_y_0, x = query_states_11_cast_fp16, y = x_21_cast_fp16)[name = string("op_2887")]; - fp16 var_2888_to_fp16 = const()[name = string("op_2888_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_5_cast_fp16 = mul(x = var_2887, y = var_2888_to_fp16)[name = string("attn_weights_5_cast_fp16")]; - tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask_slice_1)[name = string("attn_weights_7_cast_fp16")]; - int32 var_2923 = const()[name = string("op_2923"), val = int32(-1)]; - tensor var_2925_cast_fp16 = softmax(axis = var_2923, x = attn_weights_7_cast_fp16)[name = string("op_2925_cast_fp16")]; - tensor concat_22 = const()[name = string("concat_22"), val = tensor([4, 64, 512])]; - tensor reshape_3_cast_fp16 = reshape(shape = concat_22, x = var_2925_cast_fp16)[name = string("reshape_3_cast_fp16")]; - tensor concat_23 = const()[name = string("concat_23"), val = tensor([4, 512, 256])]; - tensor reshape_4_cast_fp16 = reshape(shape = concat_23, x = x_27_cast_fp16)[name = string("reshape_4_cast_fp16")]; - bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; - bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)]; - tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; - tensor concat_27 = const()[name = string("concat_27"), val = tensor([1, 4, 64, 256])]; - tensor reshape_5_cast_fp16 = reshape(shape = concat_27, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; - tensor var_2937_perm_0 = const()[name = string("op_2937_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_2956 = const()[name = string("op_2956"), val = tensor([1, 64, 1024])]; - tensor var_2937_cast_fp16 = transpose(perm = var_2937_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_222")]; - tensor attn_output_15_cast_fp16 = reshape(shape = var_2956, x = var_2937_cast_fp16)[name = string("attn_output_15_cast_fp16")]; - tensor var_2961 = const()[name = string("op_2961"), val = tensor([0, 2, 1])]; - string var_2977_pad_type_0 = const()[name = string("op_2977_pad_type_0"), val = string("valid")]; - int32 var_2977_groups_0 = const()[name = string("op_2977_groups_0"), val = int32(1)]; - tensor var_2977_strides_0 = const()[name = string("op_2977_strides_0"), val = tensor([1])]; - tensor var_2977_pad_0 = const()[name = string("op_2977_pad_0"), val = tensor([0, 0])]; - tensor var_2977_dilations_0 = const()[name = string("op_2977_dilations_0"), val = tensor([1])]; - tensor squeeze_1_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62615808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63500608))))[name = string("squeeze_1_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_2962_cast_fp16 = transpose(perm = var_2961, x = attn_output_15_cast_fp16)[name = string("transpose_221")]; - tensor var_2977_cast_fp16 = conv(dilations = var_2977_dilations_0, groups = var_2977_groups_0, pad = var_2977_pad_0, pad_type = var_2977_pad_type_0, strides = var_2977_strides_0, weight = squeeze_1_cast_fp16_to_fp32_to_fp16_palettized, x = var_2962_cast_fp16)[name = string("op_2977_cast_fp16")]; - tensor var_2981 = const()[name = string("op_2981"), val = tensor([0, 2, 1])]; - int32 var_2992 = const()[name = string("op_2992"), val = int32(-1)]; - fp16 const_72_promoted_to_fp16 = const()[name = string("const_72_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_25_cast_fp16 = transpose(perm = var_2981, x = var_2977_cast_fp16)[name = string("transpose_220")]; - tensor var_2994_cast_fp16 = mul(x = hidden_states_25_cast_fp16, y = const_72_promoted_to_fp16)[name = string("op_2994_cast_fp16")]; - bool input_31_interleave_0 = const()[name = string("input_31_interleave_0"), val = bool(false)]; - tensor input_31_cast_fp16 = concat(axis = var_2992, interleave = input_31_interleave_0, values = (hidden_states_25_cast_fp16, var_2994_cast_fp16))[name = string("input_31_cast_fp16")]; - tensor normed_37_axes_0 = const()[name = string("normed_37_axes_0"), val = tensor([-1])]; - fp16 var_2989_to_fp16 = const()[name = string("op_2989_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_37_cast_fp16 = layer_norm(axes = normed_37_axes_0, epsilon = var_2989_to_fp16, x = input_31_cast_fp16)[name = string("normed_37_cast_fp16")]; - tensor normed_39_begin_0 = const()[name = string("normed_39_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_39_end_0 = const()[name = string("normed_39_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_39_end_mask_0 = const()[name = string("normed_39_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_39_cast_fp16 = slice_by_index(begin = normed_39_begin_0, end = normed_39_end_0, end_mask = normed_39_end_mask_0, x = normed_37_cast_fp16)[name = string("normed_39_cast_fp16")]; - tensor var_3008_to_fp16 = const()[name = string("op_3008_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63519104)))]; - tensor attn_output_19_cast_fp16 = mul(x = normed_39_cast_fp16, y = var_3008_to_fp16)[name = string("attn_output_19_cast_fp16")]; - tensor hidden_states_27_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = attn_output_19_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; - int32 var_3021 = const()[name = string("op_3021"), val = int32(-1)]; - fp16 const_76_promoted_to_fp16 = const()[name = string("const_76_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3023_cast_fp16 = mul(x = hidden_states_27_cast_fp16, y = const_76_promoted_to_fp16)[name = string("op_3023_cast_fp16")]; - bool input_33_interleave_0 = const()[name = string("input_33_interleave_0"), val = bool(false)]; - tensor input_33_cast_fp16 = concat(axis = var_3021, interleave = input_33_interleave_0, values = (hidden_states_27_cast_fp16, var_3023_cast_fp16))[name = string("input_33_cast_fp16")]; - tensor normed_41_axes_0 = const()[name = string("normed_41_axes_0"), val = tensor([-1])]; - fp16 var_3018_to_fp16 = const()[name = string("op_3018_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_41_cast_fp16 = layer_norm(axes = normed_41_axes_0, epsilon = var_3018_to_fp16, x = input_33_cast_fp16)[name = string("normed_41_cast_fp16")]; - tensor normed_43_begin_0 = const()[name = string("normed_43_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_43_end_0 = const()[name = string("normed_43_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_43_end_mask_0 = const()[name = string("normed_43_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_43_cast_fp16 = slice_by_index(begin = normed_43_begin_0, end = normed_43_end_0, end_mask = normed_43_end_mask_0, x = normed_41_cast_fp16)[name = string("normed_43_cast_fp16")]; - tensor var_3037_to_fp16 = const()[name = string("op_3037_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63521472)))]; - tensor x_29_cast_fp16 = mul(x = normed_43_cast_fp16, y = var_3037_to_fp16)[name = string("x_29_cast_fp16")]; - tensor var_3049 = const()[name = string("op_3049"), val = tensor([0, 2, 1])]; - tensor input_35_axes_0 = const()[name = string("input_35_axes_0"), val = tensor([2])]; - tensor var_3050_cast_fp16 = transpose(perm = var_3049, x = x_29_cast_fp16)[name = string("transpose_219")]; - tensor input_35_cast_fp16 = expand_dims(axes = input_35_axes_0, x = var_3050_cast_fp16)[name = string("input_35_cast_fp16")]; - string x_31_pad_type_0 = const()[name = string("x_31_pad_type_0"), val = string("valid")]; - tensor x_31_strides_0 = const()[name = string("x_31_strides_0"), val = tensor([1, 1])]; - tensor x_31_pad_0 = const()[name = string("x_31_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_31_dilations_0 = const()[name = string("x_31_dilations_0"), val = tensor([1, 1])]; - int32 x_31_groups_0 = const()[name = string("x_31_groups_0"), val = int32(1)]; - tensor model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1209572608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1215544640))))[name = string("model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_31_cast_fp16 = conv(dilations = x_31_dilations_0, groups = x_31_groups_0, pad = x_31_pad_0, pad_type = x_31_pad_type_0, strides = x_31_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_35_cast_fp16)[name = string("x_31_cast_fp16")]; - string b_3_pad_type_0 = const()[name = string("b_3_pad_type_0"), val = string("valid")]; - tensor b_3_strides_0 = const()[name = string("b_3_strides_0"), val = tensor([1, 1])]; - tensor b_3_pad_0 = const()[name = string("b_3_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_3_dilations_0 = const()[name = string("b_3_dilations_0"), val = tensor([1, 1])]; - int32 b_3_groups_0 = const()[name = string("b_3_groups_0"), val = int32(1)]; - tensor model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1215655296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1221627328))))[name = string("model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_3_cast_fp16 = conv(dilations = b_3_dilations_0, groups = b_3_groups_0, pad = b_3_pad_0, pad_type = b_3_pad_type_0, strides = b_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_35_cast_fp16)[name = string("b_3_cast_fp16")]; - string var_3075_mode_0 = const()[name = string("op_3075_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_3075_cast_fp16 = gelu(mode = var_3075_mode_0, x = x_31_cast_fp16)[name = string("op_3075_cast_fp16")]; - tensor input_37_cast_fp16 = mul(x = var_3075_cast_fp16, y = b_3_cast_fp16)[name = string("input_37_cast_fp16")]; - string e_3_pad_type_0 = const()[name = string("e_3_pad_type_0"), val = string("valid")]; - tensor e_3_strides_0 = const()[name = string("e_3_strides_0"), val = tensor([1, 1])]; - tensor e_3_pad_0 = const()[name = string("e_3_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_3_dilations_0 = const()[name = string("e_3_dilations_0"), val = tensor([1, 1])]; - int32 e_3_groups_0 = const()[name = string("e_3_groups_0"), val = int32(1)]; - tensor model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75689216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81661248))))[name = string("model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_3_cast_fp16 = conv(dilations = e_3_dilations_0, groups = e_3_groups_0, pad = e_3_pad_0, pad_type = e_3_pad_type_0, strides = e_3_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_37_cast_fp16)[name = string("e_3_cast_fp16")]; - tensor var_3083_axes_0 = const()[name = string("op_3083_axes_0"), val = tensor([2])]; - tensor var_3083_cast_fp16 = squeeze(axes = var_3083_axes_0, x = e_3_cast_fp16)[name = string("op_3083_cast_fp16")]; - tensor var_3084 = const()[name = string("op_3084"), val = tensor([0, 2, 1])]; - int32 var_3095 = const()[name = string("op_3095"), val = int32(-1)]; - fp16 const_80_promoted_to_fp16 = const()[name = string("const_80_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_29_cast_fp16 = transpose(perm = var_3084, x = var_3083_cast_fp16)[name = string("transpose_218")]; - tensor var_3097_cast_fp16 = mul(x = hidden_states_29_cast_fp16, y = const_80_promoted_to_fp16)[name = string("op_3097_cast_fp16")]; - bool input_39_interleave_0 = const()[name = string("input_39_interleave_0"), val = bool(false)]; - tensor input_39_cast_fp16 = concat(axis = var_3095, interleave = input_39_interleave_0, values = (hidden_states_29_cast_fp16, var_3097_cast_fp16))[name = string("input_39_cast_fp16")]; - tensor normed_45_axes_0 = const()[name = string("normed_45_axes_0"), val = tensor([-1])]; - fp16 var_3092_to_fp16 = const()[name = string("op_3092_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_45_cast_fp16 = layer_norm(axes = normed_45_axes_0, epsilon = var_3092_to_fp16, x = input_39_cast_fp16)[name = string("normed_45_cast_fp16")]; - tensor normed_47_begin_0 = const()[name = string("normed_47_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_47_end_0 = const()[name = string("normed_47_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_47_end_mask_0 = const()[name = string("normed_47_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_47_cast_fp16 = slice_by_index(begin = normed_47_begin_0, end = normed_47_end_0, end_mask = normed_47_end_mask_0, x = normed_45_cast_fp16)[name = string("normed_47_cast_fp16")]; - tensor var_3111_to_fp16 = const()[name = string("op_3111_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81679744)))]; - tensor hidden_states_31_cast_fp16 = mul(x = normed_47_cast_fp16, y = var_3111_to_fp16)[name = string("hidden_states_31_cast_fp16")]; - tensor hidden_states_33_cast_fp16 = add(x = hidden_states_27_cast_fp16, y = hidden_states_31_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; - int32 var_3165 = const()[name = string("op_3165"), val = int32(-1)]; - fp16 const_85_promoted_to_fp16 = const()[name = string("const_85_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3167_cast_fp16 = mul(x = hidden_states_33_cast_fp16, y = const_85_promoted_to_fp16)[name = string("op_3167_cast_fp16")]; - bool input_41_interleave_0 = const()[name = string("input_41_interleave_0"), val = bool(false)]; - tensor input_41_cast_fp16 = concat(axis = var_3165, interleave = input_41_interleave_0, values = (hidden_states_33_cast_fp16, var_3167_cast_fp16))[name = string("input_41_cast_fp16")]; - tensor normed_49_axes_0 = const()[name = string("normed_49_axes_0"), val = tensor([-1])]; - fp16 var_3162_to_fp16 = const()[name = string("op_3162_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_49_cast_fp16 = layer_norm(axes = normed_49_axes_0, epsilon = var_3162_to_fp16, x = input_41_cast_fp16)[name = string("normed_49_cast_fp16")]; - tensor normed_51_begin_0 = const()[name = string("normed_51_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_51_end_0 = const()[name = string("normed_51_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_51_end_mask_0 = const()[name = string("normed_51_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_51_cast_fp16 = slice_by_index(begin = normed_51_begin_0, end = normed_51_end_0, end_mask = normed_51_end_mask_0, x = normed_49_cast_fp16)[name = string("normed_51_cast_fp16")]; - tensor var_3181_to_fp16 = const()[name = string("op_3181_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81682112)))]; - tensor hidden_states_35_cast_fp16 = mul(x = normed_51_cast_fp16, y = var_3181_to_fp16)[name = string("hidden_states_35_cast_fp16")]; - tensor var_3192 = const()[name = string("op_3192"), val = tensor([0, 2, 1])]; - tensor var_3195_axes_0 = const()[name = string("op_3195_axes_0"), val = tensor([2])]; - tensor var_3193_cast_fp16 = transpose(perm = var_3192, x = hidden_states_35_cast_fp16)[name = string("transpose_217")]; - tensor var_3195_cast_fp16 = expand_dims(axes = var_3195_axes_0, x = var_3193_cast_fp16)[name = string("op_3195_cast_fp16")]; - string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; - tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; - tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; - int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; - tensor query_states_17 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_3195_cast_fp16)[name = string("query_states_17")]; - string key_states_21_pad_type_0 = const()[name = string("key_states_21_pad_type_0"), val = string("valid")]; - tensor key_states_21_strides_0 = const()[name = string("key_states_21_strides_0"), val = tensor([1, 1])]; - tensor key_states_21_pad_0 = const()[name = string("key_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_21_dilations_0 = const()[name = string("key_states_21_dilations_0"), val = tensor([1, 1])]; - int32 key_states_21_groups_0 = const()[name = string("key_states_21_groups_0"), val = int32(1)]; - tensor key_states_21 = conv(dilations = key_states_21_dilations_0, groups = key_states_21_groups_0, pad = key_states_21_pad_0, pad_type = key_states_21_pad_type_0, strides = key_states_21_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_3195_cast_fp16)[name = string("key_states_21")]; - string value_states_17_pad_type_0 = const()[name = string("value_states_17_pad_type_0"), val = string("valid")]; - tensor value_states_17_strides_0 = const()[name = string("value_states_17_strides_0"), val = tensor([1, 1])]; - tensor value_states_17_pad_0 = const()[name = string("value_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_17_dilations_0 = const()[name = string("value_states_17_dilations_0"), val = tensor([1, 1])]; - int32 value_states_17_groups_0 = const()[name = string("value_states_17_groups_0"), val = int32(1)]; - tensor value_states_17 = conv(dilations = value_states_17_dilations_0, groups = value_states_17_groups_0, pad = value_states_17_pad_0, pad_type = value_states_17_pad_type_0, strides = value_states_17_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_3195_cast_fp16)[name = string("value_states_17")]; - tensor var_3237 = const()[name = string("op_3237"), val = tensor([1, 4, 256, 64])]; - tensor var_3238 = reshape(shape = var_3237, x = query_states_17)[name = string("op_3238")]; - tensor var_3243 = const()[name = string("op_3243"), val = tensor([0, 1, 3, 2])]; - tensor var_3248 = const()[name = string("op_3248"), val = tensor([1, 1, 256, 64])]; - tensor var_3249 = reshape(shape = var_3248, x = key_states_21)[name = string("op_3249")]; - tensor var_3254 = const()[name = string("op_3254"), val = tensor([0, 1, 3, 2])]; - tensor var_3259 = const()[name = string("op_3259"), val = tensor([1, 1, 256, 64])]; - tensor var_3260 = reshape(shape = var_3259, x = value_states_17)[name = string("op_3260")]; - tensor var_3265 = const()[name = string("op_3265"), val = tensor([0, 1, 3, 2])]; - int32 var_3276 = const()[name = string("op_3276"), val = int32(-1)]; - fp16 const_90_promoted = const()[name = string("const_90_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_37 = transpose(perm = var_3243, x = var_3238)[name = string("transpose_216")]; - tensor var_3278 = mul(x = hidden_states_37, y = const_90_promoted)[name = string("op_3278")]; - bool input_45_interleave_0 = const()[name = string("input_45_interleave_0"), val = bool(false)]; - tensor input_45 = concat(axis = var_3276, interleave = input_45_interleave_0, values = (hidden_states_37, var_3278))[name = string("input_45")]; - tensor normed_53_axes_0 = const()[name = string("normed_53_axes_0"), val = tensor([-1])]; - fp16 var_3273_to_fp16 = const()[name = string("op_3273_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_53_cast_fp16 = layer_norm(axes = normed_53_axes_0, epsilon = var_3273_to_fp16, x = input_45)[name = string("normed_53_cast_fp16")]; - tensor normed_55_begin_0 = const()[name = string("normed_55_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_55_end_0 = const()[name = string("normed_55_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_55_end_mask_0 = const()[name = string("normed_55_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_55 = slice_by_index(begin = normed_55_begin_0, end = normed_55_end_0, end_mask = normed_55_end_mask_0, x = normed_53_cast_fp16)[name = string("normed_55")]; - tensor var_3292_to_fp16 = const()[name = string("op_3292_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81684480)))]; - tensor q_5_cast_fp16 = mul(x = normed_55, y = var_3292_to_fp16)[name = string("q_5_cast_fp16")]; - int32 var_3303 = const()[name = string("op_3303"), val = int32(-1)]; - fp16 const_94_promoted = const()[name = string("const_94_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_39 = transpose(perm = var_3254, x = var_3249)[name = string("transpose_215")]; - tensor var_3305 = mul(x = hidden_states_39, y = const_94_promoted)[name = string("op_3305")]; - bool input_47_interleave_0 = const()[name = string("input_47_interleave_0"), val = bool(false)]; - tensor input_47 = concat(axis = var_3303, interleave = input_47_interleave_0, values = (hidden_states_39, var_3305))[name = string("input_47")]; - tensor normed_57_axes_0 = const()[name = string("normed_57_axes_0"), val = tensor([-1])]; - fp16 var_3300_to_fp16 = const()[name = string("op_3300_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_57_cast_fp16 = layer_norm(axes = normed_57_axes_0, epsilon = var_3300_to_fp16, x = input_47)[name = string("normed_57_cast_fp16")]; - tensor normed_59_begin_0 = const()[name = string("normed_59_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_59_end_0 = const()[name = string("normed_59_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_59_end_mask_0 = const()[name = string("normed_59_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_59 = slice_by_index(begin = normed_59_begin_0, end = normed_59_end_0, end_mask = normed_59_end_mask_0, x = normed_57_cast_fp16)[name = string("normed_59")]; - tensor var_3319_to_fp16 = const()[name = string("op_3319_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81685056)))]; - tensor k_5_cast_fp16 = mul(x = normed_59, y = var_3319_to_fp16)[name = string("k_5_cast_fp16")]; - tensor var_3333_cast_fp16 = mul(x = q_5_cast_fp16, y = cos_5)[name = string("op_3333_cast_fp16")]; - tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5_cast_fp16)[name = string("x1_9_cast_fp16")]; - tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5_cast_fp16)[name = string("x2_9_cast_fp16")]; - fp16 const_100_promoted_to_fp16 = const()[name = string("const_100_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3354_cast_fp16 = mul(x = x2_9_cast_fp16, y = const_100_promoted_to_fp16)[name = string("op_3354_cast_fp16")]; - int32 var_3356 = const()[name = string("op_3356"), val = int32(-1)]; - bool var_3357_interleave_0 = const()[name = string("op_3357_interleave_0"), val = bool(false)]; - tensor var_3357_cast_fp16 = concat(axis = var_3356, interleave = var_3357_interleave_0, values = (var_3354_cast_fp16, x1_9_cast_fp16))[name = string("op_3357_cast_fp16")]; - tensor var_3358_cast_fp16 = mul(x = var_3357_cast_fp16, y = sin_5)[name = string("op_3358_cast_fp16")]; - tensor query_states_19_cast_fp16 = add(x = var_3333_cast_fp16, y = var_3358_cast_fp16)[name = string("query_states_19_cast_fp16")]; - tensor var_3361_cast_fp16 = mul(x = k_5_cast_fp16, y = cos_5)[name = string("op_3361_cast_fp16")]; - tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5_cast_fp16)[name = string("x1_11_cast_fp16")]; - tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5_cast_fp16)[name = string("x2_11_cast_fp16")]; - fp16 const_103_promoted_to_fp16 = const()[name = string("const_103_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3382_cast_fp16 = mul(x = x2_11_cast_fp16, y = const_103_promoted_to_fp16)[name = string("op_3382_cast_fp16")]; - int32 var_3384 = const()[name = string("op_3384"), val = int32(-1)]; - bool var_3385_interleave_0 = const()[name = string("op_3385_interleave_0"), val = bool(false)]; - tensor var_3385_cast_fp16 = concat(axis = var_3384, interleave = var_3385_interleave_0, values = (var_3382_cast_fp16, x1_11_cast_fp16))[name = string("op_3385_cast_fp16")]; - tensor var_3386_cast_fp16 = mul(x = var_3385_cast_fp16, y = sin_5)[name = string("op_3386_cast_fp16")]; - tensor key_states_23_cast_fp16 = add(x = var_3361_cast_fp16, y = var_3386_cast_fp16)[name = string("key_states_23_cast_fp16")]; - tensor key_slice_5_begin_0 = const()[name = string("key_slice_5_begin_0"), val = tensor([2, 0, 0, 0])]; - tensor key_slice_5_end_0 = const()[name = string("key_slice_5_end_0"), val = tensor([3, 1, 512, 256])]; - tensor key_slice_5_end_mask_0 = const()[name = string("key_slice_5_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_5_cast_fp16 = slice_by_index(begin = key_slice_5_begin_0, end = key_slice_5_end_0, end_mask = key_slice_5_end_mask_0, x = coreml_update_state_55)[name = string("key_slice_5_cast_fp16")]; - tensor var_3423_begin_0 = const()[name = string("op_3423_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_3423_end_0 = const()[name = string("op_3423_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_3423_end_mask_0 = const()[name = string("op_3423_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_3423_cast_fp16 = slice_by_index(begin = var_3423_begin_0, end = var_3423_end_0, end_mask = var_3423_end_mask_0, x = key_slice_5_cast_fp16)[name = string("op_3423_cast_fp16")]; - int32 var_3450 = const()[name = string("op_3450"), val = int32(2)]; - bool shifted_key_5_interleave_0 = const()[name = string("shifted_key_5_interleave_0"), val = bool(false)]; - tensor shifted_key_5_cast_fp16 = concat(axis = var_3450, interleave = shifted_key_5_interleave_0, values = (var_3423_cast_fp16, key_states_23_cast_fp16))[name = string("shifted_key_5_cast_fp16")]; - tensor concat_28 = const()[name = string("concat_28"), val = tensor([2, 0, 0, 0])]; - tensor concat_29 = const()[name = string("concat_29"), val = tensor([3, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_28, begin_mask = model_model_kv_cache_local_internal_tensor_assign_5_begin_mask_0, end = concat_29, end_mask = model_model_kv_cache_local_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_5_stride_0, update = shifted_key_5_cast_fp16, x = coreml_update_state_55)[name = string("model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_160_write_state")]; - tensor coreml_update_state_56 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_160")]; - tensor value_slice_5_begin_0 = const()[name = string("value_slice_5_begin_0"), val = tensor([24, 0, 0, 0])]; - tensor value_slice_5_end_0 = const()[name = string("value_slice_5_end_0"), val = tensor([25, 1, 512, 256])]; - tensor value_slice_5_end_mask_0 = const()[name = string("value_slice_5_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_5_cast_fp16 = slice_by_index(begin = value_slice_5_begin_0, end = value_slice_5_end_0, end_mask = value_slice_5_end_mask_0, x = coreml_update_state_56)[name = string("value_slice_5_cast_fp16")]; - tensor var_3493_begin_0 = const()[name = string("op_3493_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_3493_end_0 = const()[name = string("op_3493_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_3493_end_mask_0 = const()[name = string("op_3493_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_3493_cast_fp16 = slice_by_index(begin = var_3493_begin_0, end = var_3493_end_0, end_mask = var_3493_end_mask_0, x = value_slice_5_cast_fp16)[name = string("op_3493_cast_fp16")]; - int32 var_3520 = const()[name = string("op_3520"), val = int32(2)]; - bool shifted_value_5_interleave_0 = const()[name = string("shifted_value_5_interleave_0"), val = bool(false)]; - tensor value_states_19 = transpose(perm = var_3265, x = var_3260)[name = string("transpose_214")]; - tensor shifted_value_5_cast_fp16 = concat(axis = var_3520, interleave = shifted_value_5_interleave_0, values = (var_3493_cast_fp16, value_states_19))[name = string("shifted_value_5_cast_fp16")]; - tensor concat_30 = const()[name = string("concat_30"), val = tensor([24, 0, 0, 0])]; - tensor concat_31 = const()[name = string("concat_31"), val = tensor([25, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_local_internal_tensor_assign_6_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_local_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_6_stride_0, update = shifted_value_5_cast_fp16, x = coreml_update_state_56)[name = string("model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_161_write_state")]; - tensor coreml_update_state_57 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_161")]; - tensor var_3548_begin_0 = const()[name = string("op_3548_begin_0"), val = tensor([2, 0, 0, 0])]; - tensor var_3548_end_0 = const()[name = string("op_3548_end_0"), val = tensor([3, 1, 512, 256])]; - tensor var_3548_end_mask_0 = const()[name = string("op_3548_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_3548_cast_fp16 = slice_by_index(begin = var_3548_begin_0, end = var_3548_end_0, end_mask = var_3548_end_mask_0, x = coreml_update_state_57)[name = string("op_3548_cast_fp16")]; - tensor var_3555_begin_0 = const()[name = string("op_3555_begin_0"), val = tensor([24, 0, 0, 0])]; - tensor var_3555_end_0 = const()[name = string("op_3555_end_0"), val = tensor([25, 1, 512, 256])]; - tensor var_3555_end_mask_0 = const()[name = string("op_3555_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_3555_cast_fp16 = slice_by_index(begin = var_3555_begin_0, end = var_3555_end_0, end_mask = var_3555_end_mask_0, x = coreml_update_state_57)[name = string("op_3555_cast_fp16")]; - tensor var_3594 = const()[name = string("op_3594"), val = tensor([1, 4, 1, 1])]; - tensor x_37_cast_fp16 = tile(reps = var_3594, x = var_3548_cast_fp16)[name = string("x_37_cast_fp16")]; - tensor var_3614 = const()[name = string("op_3614"), val = tensor([1, 4, 1, 1])]; - tensor x_43_cast_fp16 = tile(reps = var_3614, x = var_3555_cast_fp16)[name = string("x_43_cast_fp16")]; - bool var_3641_transpose_x_0 = const()[name = string("op_3641_transpose_x_0"), val = bool(false)]; - bool var_3641_transpose_y_0 = const()[name = string("op_3641_transpose_y_0"), val = bool(true)]; - tensor var_3641 = matmul(transpose_x = var_3641_transpose_x_0, transpose_y = var_3641_transpose_y_0, x = query_states_19_cast_fp16, y = x_37_cast_fp16)[name = string("op_3641")]; - fp16 var_3642_to_fp16 = const()[name = string("op_3642_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_9_cast_fp16 = mul(x = var_3641, y = var_3642_to_fp16)[name = string("attn_weights_9_cast_fp16")]; - tensor attn_weights_11_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask_slice_1)[name = string("attn_weights_11_cast_fp16")]; - int32 var_3677 = const()[name = string("op_3677"), val = int32(-1)]; - tensor var_3679_cast_fp16 = softmax(axis = var_3677, x = attn_weights_11_cast_fp16)[name = string("op_3679_cast_fp16")]; - tensor concat_36 = const()[name = string("concat_36"), val = tensor([4, 64, 512])]; - tensor reshape_6_cast_fp16 = reshape(shape = concat_36, x = var_3679_cast_fp16)[name = string("reshape_6_cast_fp16")]; - tensor concat_37 = const()[name = string("concat_37"), val = tensor([4, 512, 256])]; - tensor reshape_7_cast_fp16 = reshape(shape = concat_37, x = x_43_cast_fp16)[name = string("reshape_7_cast_fp16")]; - bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; - bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)]; - tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; - tensor concat_41 = const()[name = string("concat_41"), val = tensor([1, 4, 64, 256])]; - tensor reshape_8_cast_fp16 = reshape(shape = concat_41, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; - tensor var_3691_perm_0 = const()[name = string("op_3691_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_3710 = const()[name = string("op_3710"), val = tensor([1, 64, 1024])]; - tensor var_3691_cast_fp16 = transpose(perm = var_3691_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_213")]; - tensor attn_output_25_cast_fp16 = reshape(shape = var_3710, x = var_3691_cast_fp16)[name = string("attn_output_25_cast_fp16")]; - tensor var_3715 = const()[name = string("op_3715"), val = tensor([0, 2, 1])]; - string var_3731_pad_type_0 = const()[name = string("op_3731_pad_type_0"), val = string("valid")]; - int32 var_3731_groups_0 = const()[name = string("op_3731_groups_0"), val = int32(1)]; - tensor var_3731_strides_0 = const()[name = string("op_3731_strides_0"), val = tensor([1])]; - tensor var_3731_pad_0 = const()[name = string("op_3731_pad_0"), val = tensor([0, 0])]; - tensor var_3731_dilations_0 = const()[name = string("op_3731_dilations_0"), val = tensor([1])]; - tensor squeeze_2_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81685632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82570432))))[name = string("squeeze_2_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_3716_cast_fp16 = transpose(perm = var_3715, x = attn_output_25_cast_fp16)[name = string("transpose_212")]; - tensor var_3731_cast_fp16 = conv(dilations = var_3731_dilations_0, groups = var_3731_groups_0, pad = var_3731_pad_0, pad_type = var_3731_pad_type_0, strides = var_3731_strides_0, weight = squeeze_2_cast_fp16_to_fp32_to_fp16_palettized, x = var_3716_cast_fp16)[name = string("op_3731_cast_fp16")]; - tensor var_3735 = const()[name = string("op_3735"), val = tensor([0, 2, 1])]; - int32 var_3746 = const()[name = string("op_3746"), val = int32(-1)]; - fp16 const_114_promoted_to_fp16 = const()[name = string("const_114_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_41_cast_fp16 = transpose(perm = var_3735, x = var_3731_cast_fp16)[name = string("transpose_211")]; - tensor var_3748_cast_fp16 = mul(x = hidden_states_41_cast_fp16, y = const_114_promoted_to_fp16)[name = string("op_3748_cast_fp16")]; - bool input_51_interleave_0 = const()[name = string("input_51_interleave_0"), val = bool(false)]; - tensor input_51_cast_fp16 = concat(axis = var_3746, interleave = input_51_interleave_0, values = (hidden_states_41_cast_fp16, var_3748_cast_fp16))[name = string("input_51_cast_fp16")]; - tensor normed_61_axes_0 = const()[name = string("normed_61_axes_0"), val = tensor([-1])]; - fp16 var_3743_to_fp16 = const()[name = string("op_3743_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_61_cast_fp16 = layer_norm(axes = normed_61_axes_0, epsilon = var_3743_to_fp16, x = input_51_cast_fp16)[name = string("normed_61_cast_fp16")]; - tensor normed_63_begin_0 = const()[name = string("normed_63_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_63_end_0 = const()[name = string("normed_63_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_63_end_mask_0 = const()[name = string("normed_63_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_63_cast_fp16 = slice_by_index(begin = normed_63_begin_0, end = normed_63_end_0, end_mask = normed_63_end_mask_0, x = normed_61_cast_fp16)[name = string("normed_63_cast_fp16")]; - tensor var_3762_to_fp16 = const()[name = string("op_3762_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82588928)))]; - tensor attn_output_29_cast_fp16 = mul(x = normed_63_cast_fp16, y = var_3762_to_fp16)[name = string("attn_output_29_cast_fp16")]; - tensor hidden_states_43_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = attn_output_29_cast_fp16)[name = string("hidden_states_43_cast_fp16")]; - int32 var_3775 = const()[name = string("op_3775"), val = int32(-1)]; - fp16 const_118_promoted_to_fp16 = const()[name = string("const_118_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3777_cast_fp16 = mul(x = hidden_states_43_cast_fp16, y = const_118_promoted_to_fp16)[name = string("op_3777_cast_fp16")]; - bool input_53_interleave_0 = const()[name = string("input_53_interleave_0"), val = bool(false)]; - tensor input_53_cast_fp16 = concat(axis = var_3775, interleave = input_53_interleave_0, values = (hidden_states_43_cast_fp16, var_3777_cast_fp16))[name = string("input_53_cast_fp16")]; - tensor normed_65_axes_0 = const()[name = string("normed_65_axes_0"), val = tensor([-1])]; - fp16 var_3772_to_fp16 = const()[name = string("op_3772_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_65_cast_fp16 = layer_norm(axes = normed_65_axes_0, epsilon = var_3772_to_fp16, x = input_53_cast_fp16)[name = string("normed_65_cast_fp16")]; - tensor normed_67_begin_0 = const()[name = string("normed_67_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_67_end_0 = const()[name = string("normed_67_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_67_end_mask_0 = const()[name = string("normed_67_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_67_cast_fp16 = slice_by_index(begin = normed_67_begin_0, end = normed_67_end_0, end_mask = normed_67_end_mask_0, x = normed_65_cast_fp16)[name = string("normed_67_cast_fp16")]; - tensor var_3791_to_fp16 = const()[name = string("op_3791_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82591296)))]; - tensor x_45_cast_fp16 = mul(x = normed_67_cast_fp16, y = var_3791_to_fp16)[name = string("x_45_cast_fp16")]; - tensor var_3803 = const()[name = string("op_3803"), val = tensor([0, 2, 1])]; - tensor input_55_axes_0 = const()[name = string("input_55_axes_0"), val = tensor([2])]; - tensor var_3804_cast_fp16 = transpose(perm = var_3803, x = x_45_cast_fp16)[name = string("transpose_210")]; - tensor input_55_cast_fp16 = expand_dims(axes = input_55_axes_0, x = var_3804_cast_fp16)[name = string("input_55_cast_fp16")]; - string x_47_pad_type_0 = const()[name = string("x_47_pad_type_0"), val = string("valid")]; - tensor x_47_strides_0 = const()[name = string("x_47_strides_0"), val = tensor([1, 1])]; - tensor x_47_pad_0 = const()[name = string("x_47_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_47_dilations_0 = const()[name = string("x_47_dilations_0"), val = tensor([1, 1])]; - int32 x_47_groups_0 = const()[name = string("x_47_groups_0"), val = int32(1)]; - tensor model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1221737984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1227710016))))[name = string("model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_47_cast_fp16 = conv(dilations = x_47_dilations_0, groups = x_47_groups_0, pad = x_47_pad_0, pad_type = x_47_pad_type_0, strides = x_47_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_55_cast_fp16)[name = string("x_47_cast_fp16")]; - string b_5_pad_type_0 = const()[name = string("b_5_pad_type_0"), val = string("valid")]; - tensor b_5_strides_0 = const()[name = string("b_5_strides_0"), val = tensor([1, 1])]; - tensor b_5_pad_0 = const()[name = string("b_5_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_5_dilations_0 = const()[name = string("b_5_dilations_0"), val = tensor([1, 1])]; - int32 b_5_groups_0 = const()[name = string("b_5_groups_0"), val = int32(1)]; - tensor model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1227820672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1233792704))))[name = string("model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_5_cast_fp16 = conv(dilations = b_5_dilations_0, groups = b_5_groups_0, pad = b_5_pad_0, pad_type = b_5_pad_type_0, strides = b_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_55_cast_fp16)[name = string("b_5_cast_fp16")]; - string var_3829_mode_0 = const()[name = string("op_3829_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_3829_cast_fp16 = gelu(mode = var_3829_mode_0, x = x_47_cast_fp16)[name = string("op_3829_cast_fp16")]; - tensor input_57_cast_fp16 = mul(x = var_3829_cast_fp16, y = b_5_cast_fp16)[name = string("input_57_cast_fp16")]; - string e_5_pad_type_0 = const()[name = string("e_5_pad_type_0"), val = string("valid")]; - tensor e_5_strides_0 = const()[name = string("e_5_strides_0"), val = tensor([1, 1])]; - tensor e_5_pad_0 = const()[name = string("e_5_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_5_dilations_0 = const()[name = string("e_5_dilations_0"), val = tensor([1, 1])]; - int32 e_5_groups_0 = const()[name = string("e_5_groups_0"), val = int32(1)]; - tensor model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94759040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100731072))))[name = string("model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_5_cast_fp16 = conv(dilations = e_5_dilations_0, groups = e_5_groups_0, pad = e_5_pad_0, pad_type = e_5_pad_type_0, strides = e_5_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_57_cast_fp16)[name = string("e_5_cast_fp16")]; - tensor var_3837_axes_0 = const()[name = string("op_3837_axes_0"), val = tensor([2])]; - tensor var_3837_cast_fp16 = squeeze(axes = var_3837_axes_0, x = e_5_cast_fp16)[name = string("op_3837_cast_fp16")]; - tensor var_3838 = const()[name = string("op_3838"), val = tensor([0, 2, 1])]; - int32 var_3849 = const()[name = string("op_3849"), val = int32(-1)]; - fp16 const_122_promoted_to_fp16 = const()[name = string("const_122_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_45_cast_fp16 = transpose(perm = var_3838, x = var_3837_cast_fp16)[name = string("transpose_209")]; - tensor var_3851_cast_fp16 = mul(x = hidden_states_45_cast_fp16, y = const_122_promoted_to_fp16)[name = string("op_3851_cast_fp16")]; - bool input_59_interleave_0 = const()[name = string("input_59_interleave_0"), val = bool(false)]; - tensor input_59_cast_fp16 = concat(axis = var_3849, interleave = input_59_interleave_0, values = (hidden_states_45_cast_fp16, var_3851_cast_fp16))[name = string("input_59_cast_fp16")]; - tensor normed_69_axes_0 = const()[name = string("normed_69_axes_0"), val = tensor([-1])]; - fp16 var_3846_to_fp16 = const()[name = string("op_3846_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_69_cast_fp16 = layer_norm(axes = normed_69_axes_0, epsilon = var_3846_to_fp16, x = input_59_cast_fp16)[name = string("normed_69_cast_fp16")]; - tensor normed_71_begin_0 = const()[name = string("normed_71_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_71_end_0 = const()[name = string("normed_71_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_71_end_mask_0 = const()[name = string("normed_71_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_71_cast_fp16 = slice_by_index(begin = normed_71_begin_0, end = normed_71_end_0, end_mask = normed_71_end_mask_0, x = normed_69_cast_fp16)[name = string("normed_71_cast_fp16")]; - tensor var_3865_to_fp16 = const()[name = string("op_3865_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100749568)))]; - tensor hidden_states_47_cast_fp16 = mul(x = normed_71_cast_fp16, y = var_3865_to_fp16)[name = string("hidden_states_47_cast_fp16")]; - tensor hidden_states_49_cast_fp16 = add(x = hidden_states_43_cast_fp16, y = hidden_states_47_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; - int32 var_3919 = const()[name = string("op_3919"), val = int32(-1)]; - fp16 const_127_promoted_to_fp16 = const()[name = string("const_127_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_3921_cast_fp16 = mul(x = hidden_states_49_cast_fp16, y = const_127_promoted_to_fp16)[name = string("op_3921_cast_fp16")]; - bool input_61_interleave_0 = const()[name = string("input_61_interleave_0"), val = bool(false)]; - tensor input_61_cast_fp16 = concat(axis = var_3919, interleave = input_61_interleave_0, values = (hidden_states_49_cast_fp16, var_3921_cast_fp16))[name = string("input_61_cast_fp16")]; - tensor normed_73_axes_0 = const()[name = string("normed_73_axes_0"), val = tensor([-1])]; - fp16 var_3916_to_fp16 = const()[name = string("op_3916_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_73_cast_fp16 = layer_norm(axes = normed_73_axes_0, epsilon = var_3916_to_fp16, x = input_61_cast_fp16)[name = string("normed_73_cast_fp16")]; - tensor normed_75_begin_0 = const()[name = string("normed_75_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_75_end_0 = const()[name = string("normed_75_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_75_end_mask_0 = const()[name = string("normed_75_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_75_cast_fp16 = slice_by_index(begin = normed_75_begin_0, end = normed_75_end_0, end_mask = normed_75_end_mask_0, x = normed_73_cast_fp16)[name = string("normed_75_cast_fp16")]; - tensor var_3935_to_fp16 = const()[name = string("op_3935_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100751936)))]; - tensor hidden_states_51_cast_fp16 = mul(x = normed_75_cast_fp16, y = var_3935_to_fp16)[name = string("hidden_states_51_cast_fp16")]; - tensor var_3946 = const()[name = string("op_3946"), val = tensor([0, 2, 1])]; - tensor var_3949_axes_0 = const()[name = string("op_3949_axes_0"), val = tensor([2])]; - tensor var_3947_cast_fp16 = transpose(perm = var_3946, x = hidden_states_51_cast_fp16)[name = string("transpose_208")]; - tensor var_3949_cast_fp16 = expand_dims(axes = var_3949_axes_0, x = var_3947_cast_fp16)[name = string("op_3949_cast_fp16")]; - string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; - tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; - tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; - int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; - tensor query_states_25 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_3949_cast_fp16)[name = string("query_states_25")]; - string key_states_31_pad_type_0 = const()[name = string("key_states_31_pad_type_0"), val = string("valid")]; - tensor key_states_31_strides_0 = const()[name = string("key_states_31_strides_0"), val = tensor([1, 1])]; - tensor key_states_31_pad_0 = const()[name = string("key_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_31_dilations_0 = const()[name = string("key_states_31_dilations_0"), val = tensor([1, 1])]; - int32 key_states_31_groups_0 = const()[name = string("key_states_31_groups_0"), val = int32(1)]; - tensor key_states_31 = conv(dilations = key_states_31_dilations_0, groups = key_states_31_groups_0, pad = key_states_31_pad_0, pad_type = key_states_31_pad_type_0, strides = key_states_31_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_3949_cast_fp16)[name = string("key_states_31")]; - string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; - tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; - tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; - int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; - tensor value_states_25 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_3949_cast_fp16)[name = string("value_states_25")]; - tensor var_3991 = const()[name = string("op_3991"), val = tensor([1, 4, 256, 64])]; - tensor var_3992 = reshape(shape = var_3991, x = query_states_25)[name = string("op_3992")]; - tensor var_3997 = const()[name = string("op_3997"), val = tensor([0, 1, 3, 2])]; - tensor var_4002 = const()[name = string("op_4002"), val = tensor([1, 1, 256, 64])]; - tensor var_4003 = reshape(shape = var_4002, x = key_states_31)[name = string("op_4003")]; - tensor var_4008 = const()[name = string("op_4008"), val = tensor([0, 1, 3, 2])]; - tensor var_4013 = const()[name = string("op_4013"), val = tensor([1, 1, 256, 64])]; - tensor var_4014 = reshape(shape = var_4013, x = value_states_25)[name = string("op_4014")]; - tensor var_4019 = const()[name = string("op_4019"), val = tensor([0, 1, 3, 2])]; - int32 var_4030 = const()[name = string("op_4030"), val = int32(-1)]; - fp16 const_132_promoted = const()[name = string("const_132_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_53 = transpose(perm = var_3997, x = var_3992)[name = string("transpose_207")]; - tensor var_4032 = mul(x = hidden_states_53, y = const_132_promoted)[name = string("op_4032")]; - bool input_65_interleave_0 = const()[name = string("input_65_interleave_0"), val = bool(false)]; - tensor input_65 = concat(axis = var_4030, interleave = input_65_interleave_0, values = (hidden_states_53, var_4032))[name = string("input_65")]; - tensor normed_77_axes_0 = const()[name = string("normed_77_axes_0"), val = tensor([-1])]; - fp16 var_4027_to_fp16 = const()[name = string("op_4027_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_77_cast_fp16 = layer_norm(axes = normed_77_axes_0, epsilon = var_4027_to_fp16, x = input_65)[name = string("normed_77_cast_fp16")]; - tensor normed_79_begin_0 = const()[name = string("normed_79_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_79_end_0 = const()[name = string("normed_79_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_79_end_mask_0 = const()[name = string("normed_79_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_79 = slice_by_index(begin = normed_79_begin_0, end = normed_79_end_0, end_mask = normed_79_end_mask_0, x = normed_77_cast_fp16)[name = string("normed_79")]; - tensor var_4046_to_fp16 = const()[name = string("op_4046_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100754304)))]; - tensor q_7_cast_fp16 = mul(x = normed_79, y = var_4046_to_fp16)[name = string("q_7_cast_fp16")]; - int32 var_4057 = const()[name = string("op_4057"), val = int32(-1)]; - fp16 const_136_promoted = const()[name = string("const_136_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_55 = transpose(perm = var_4008, x = var_4003)[name = string("transpose_206")]; - tensor var_4059 = mul(x = hidden_states_55, y = const_136_promoted)[name = string("op_4059")]; - bool input_67_interleave_0 = const()[name = string("input_67_interleave_0"), val = bool(false)]; - tensor input_67 = concat(axis = var_4057, interleave = input_67_interleave_0, values = (hidden_states_55, var_4059))[name = string("input_67")]; - tensor normed_81_axes_0 = const()[name = string("normed_81_axes_0"), val = tensor([-1])]; - fp16 var_4054_to_fp16 = const()[name = string("op_4054_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_81_cast_fp16 = layer_norm(axes = normed_81_axes_0, epsilon = var_4054_to_fp16, x = input_67)[name = string("normed_81_cast_fp16")]; - tensor normed_83_begin_0 = const()[name = string("normed_83_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_83_end_0 = const()[name = string("normed_83_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_83_end_mask_0 = const()[name = string("normed_83_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_83 = slice_by_index(begin = normed_83_begin_0, end = normed_83_end_0, end_mask = normed_83_end_mask_0, x = normed_81_cast_fp16)[name = string("normed_83")]; - tensor var_4073_to_fp16 = const()[name = string("op_4073_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100754880)))]; - tensor k_7_cast_fp16 = mul(x = normed_83, y = var_4073_to_fp16)[name = string("k_7_cast_fp16")]; - tensor var_4087_cast_fp16 = mul(x = q_7_cast_fp16, y = cos_5)[name = string("op_4087_cast_fp16")]; - tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7_cast_fp16)[name = string("x1_13_cast_fp16")]; - tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7_cast_fp16)[name = string("x2_13_cast_fp16")]; - fp16 const_142_promoted_to_fp16 = const()[name = string("const_142_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4108_cast_fp16 = mul(x = x2_13_cast_fp16, y = const_142_promoted_to_fp16)[name = string("op_4108_cast_fp16")]; - int32 var_4110 = const()[name = string("op_4110"), val = int32(-1)]; - bool var_4111_interleave_0 = const()[name = string("op_4111_interleave_0"), val = bool(false)]; - tensor var_4111_cast_fp16 = concat(axis = var_4110, interleave = var_4111_interleave_0, values = (var_4108_cast_fp16, x1_13_cast_fp16))[name = string("op_4111_cast_fp16")]; - tensor var_4112_cast_fp16 = mul(x = var_4111_cast_fp16, y = sin_5)[name = string("op_4112_cast_fp16")]; - tensor query_states_27_cast_fp16 = add(x = var_4087_cast_fp16, y = var_4112_cast_fp16)[name = string("query_states_27_cast_fp16")]; - tensor var_4115_cast_fp16 = mul(x = k_7_cast_fp16, y = cos_5)[name = string("op_4115_cast_fp16")]; - tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7_cast_fp16)[name = string("x1_15_cast_fp16")]; - tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7_cast_fp16)[name = string("x2_15_cast_fp16")]; - fp16 const_145_promoted_to_fp16 = const()[name = string("const_145_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4136_cast_fp16 = mul(x = x2_15_cast_fp16, y = const_145_promoted_to_fp16)[name = string("op_4136_cast_fp16")]; - int32 var_4138 = const()[name = string("op_4138"), val = int32(-1)]; - bool var_4139_interleave_0 = const()[name = string("op_4139_interleave_0"), val = bool(false)]; - tensor var_4139_cast_fp16 = concat(axis = var_4138, interleave = var_4139_interleave_0, values = (var_4136_cast_fp16, x1_15_cast_fp16))[name = string("op_4139_cast_fp16")]; - tensor var_4140_cast_fp16 = mul(x = var_4139_cast_fp16, y = sin_5)[name = string("op_4140_cast_fp16")]; - tensor key_states_33_cast_fp16 = add(x = var_4115_cast_fp16, y = var_4140_cast_fp16)[name = string("key_states_33_cast_fp16")]; - tensor key_slice_7_begin_0 = const()[name = string("key_slice_7_begin_0"), val = tensor([3, 0, 0, 0])]; - tensor key_slice_7_end_0 = const()[name = string("key_slice_7_end_0"), val = tensor([4, 1, 512, 256])]; - tensor key_slice_7_end_mask_0 = const()[name = string("key_slice_7_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_7_cast_fp16 = slice_by_index(begin = key_slice_7_begin_0, end = key_slice_7_end_0, end_mask = key_slice_7_end_mask_0, x = coreml_update_state_57)[name = string("key_slice_7_cast_fp16")]; - tensor var_4177_begin_0 = const()[name = string("op_4177_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_4177_end_0 = const()[name = string("op_4177_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_4177_end_mask_0 = const()[name = string("op_4177_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_4177_cast_fp16 = slice_by_index(begin = var_4177_begin_0, end = var_4177_end_0, end_mask = var_4177_end_mask_0, x = key_slice_7_cast_fp16)[name = string("op_4177_cast_fp16")]; - int32 var_4204 = const()[name = string("op_4204"), val = int32(2)]; - bool shifted_key_7_interleave_0 = const()[name = string("shifted_key_7_interleave_0"), val = bool(false)]; - tensor shifted_key_7_cast_fp16 = concat(axis = var_4204, interleave = shifted_key_7_interleave_0, values = (var_4177_cast_fp16, key_states_33_cast_fp16))[name = string("shifted_key_7_cast_fp16")]; - tensor concat_42 = const()[name = string("concat_42"), val = tensor([3, 0, 0, 0])]; - tensor concat_43 = const()[name = string("concat_43"), val = tensor([4, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_local_internal_tensor_assign_7_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_local_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_7_stride_0, update = shifted_key_7_cast_fp16, x = coreml_update_state_57)[name = string("model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_162_write_state")]; - tensor coreml_update_state_58 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_162")]; - tensor value_slice_7_begin_0 = const()[name = string("value_slice_7_begin_0"), val = tensor([25, 0, 0, 0])]; - tensor value_slice_7_end_0 = const()[name = string("value_slice_7_end_0"), val = tensor([26, 1, 512, 256])]; - tensor value_slice_7_end_mask_0 = const()[name = string("value_slice_7_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_7_cast_fp16 = slice_by_index(begin = value_slice_7_begin_0, end = value_slice_7_end_0, end_mask = value_slice_7_end_mask_0, x = coreml_update_state_58)[name = string("value_slice_7_cast_fp16")]; - tensor var_4247_begin_0 = const()[name = string("op_4247_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_4247_end_0 = const()[name = string("op_4247_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_4247_end_mask_0 = const()[name = string("op_4247_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_4247_cast_fp16 = slice_by_index(begin = var_4247_begin_0, end = var_4247_end_0, end_mask = var_4247_end_mask_0, x = value_slice_7_cast_fp16)[name = string("op_4247_cast_fp16")]; - int32 var_4274 = const()[name = string("op_4274"), val = int32(2)]; - bool shifted_value_7_interleave_0 = const()[name = string("shifted_value_7_interleave_0"), val = bool(false)]; - tensor value_states_27 = transpose(perm = var_4019, x = var_4014)[name = string("transpose_205")]; - tensor shifted_value_7_cast_fp16 = concat(axis = var_4274, interleave = shifted_value_7_interleave_0, values = (var_4247_cast_fp16, value_states_27))[name = string("shifted_value_7_cast_fp16")]; - tensor concat_44 = const()[name = string("concat_44"), val = tensor([25, 0, 0, 0])]; - tensor concat_45 = const()[name = string("concat_45"), val = tensor([26, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_44, begin_mask = model_model_kv_cache_local_internal_tensor_assign_8_begin_mask_0, end = concat_45, end_mask = model_model_kv_cache_local_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_8_stride_0, update = shifted_value_7_cast_fp16, x = coreml_update_state_58)[name = string("model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_163_write_state")]; - tensor coreml_update_state_59 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_163")]; - tensor var_4302_begin_0 = const()[name = string("op_4302_begin_0"), val = tensor([3, 0, 0, 0])]; - tensor var_4302_end_0 = const()[name = string("op_4302_end_0"), val = tensor([4, 1, 512, 256])]; - tensor var_4302_end_mask_0 = const()[name = string("op_4302_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_4302_cast_fp16 = slice_by_index(begin = var_4302_begin_0, end = var_4302_end_0, end_mask = var_4302_end_mask_0, x = coreml_update_state_59)[name = string("op_4302_cast_fp16")]; - tensor var_4309_begin_0 = const()[name = string("op_4309_begin_0"), val = tensor([25, 0, 0, 0])]; - tensor var_4309_end_0 = const()[name = string("op_4309_end_0"), val = tensor([26, 1, 512, 256])]; - tensor var_4309_end_mask_0 = const()[name = string("op_4309_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_4309_cast_fp16 = slice_by_index(begin = var_4309_begin_0, end = var_4309_end_0, end_mask = var_4309_end_mask_0, x = coreml_update_state_59)[name = string("op_4309_cast_fp16")]; - tensor var_4348 = const()[name = string("op_4348"), val = tensor([1, 4, 1, 1])]; - tensor x_53_cast_fp16 = tile(reps = var_4348, x = var_4302_cast_fp16)[name = string("x_53_cast_fp16")]; - tensor var_4368 = const()[name = string("op_4368"), val = tensor([1, 4, 1, 1])]; - tensor x_59_cast_fp16 = tile(reps = var_4368, x = var_4309_cast_fp16)[name = string("x_59_cast_fp16")]; - bool var_4395_transpose_x_0 = const()[name = string("op_4395_transpose_x_0"), val = bool(false)]; - bool var_4395_transpose_y_0 = const()[name = string("op_4395_transpose_y_0"), val = bool(true)]; - tensor var_4395 = matmul(transpose_x = var_4395_transpose_x_0, transpose_y = var_4395_transpose_y_0, x = query_states_27_cast_fp16, y = x_53_cast_fp16)[name = string("op_4395")]; - fp16 var_4396_to_fp16 = const()[name = string("op_4396_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_13_cast_fp16 = mul(x = var_4395, y = var_4396_to_fp16)[name = string("attn_weights_13_cast_fp16")]; - tensor attn_weights_15_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = mask_slice_1)[name = string("attn_weights_15_cast_fp16")]; - int32 var_4431 = const()[name = string("op_4431"), val = int32(-1)]; - tensor var_4433_cast_fp16 = softmax(axis = var_4431, x = attn_weights_15_cast_fp16)[name = string("op_4433_cast_fp16")]; - tensor concat_50 = const()[name = string("concat_50"), val = tensor([4, 64, 512])]; - tensor reshape_9_cast_fp16 = reshape(shape = concat_50, x = var_4433_cast_fp16)[name = string("reshape_9_cast_fp16")]; - tensor concat_51 = const()[name = string("concat_51"), val = tensor([4, 512, 256])]; - tensor reshape_10_cast_fp16 = reshape(shape = concat_51, x = x_59_cast_fp16)[name = string("reshape_10_cast_fp16")]; - bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; - bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)]; - tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; - tensor concat_55 = const()[name = string("concat_55"), val = tensor([1, 4, 64, 256])]; - tensor reshape_11_cast_fp16 = reshape(shape = concat_55, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; - tensor var_4445_perm_0 = const()[name = string("op_4445_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_4464 = const()[name = string("op_4464"), val = tensor([1, 64, 1024])]; - tensor var_4445_cast_fp16 = transpose(perm = var_4445_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_204")]; - tensor attn_output_35_cast_fp16 = reshape(shape = var_4464, x = var_4445_cast_fp16)[name = string("attn_output_35_cast_fp16")]; - tensor var_4469 = const()[name = string("op_4469"), val = tensor([0, 2, 1])]; - string var_4485_pad_type_0 = const()[name = string("op_4485_pad_type_0"), val = string("valid")]; - int32 var_4485_groups_0 = const()[name = string("op_4485_groups_0"), val = int32(1)]; - tensor var_4485_strides_0 = const()[name = string("op_4485_strides_0"), val = tensor([1])]; - tensor var_4485_pad_0 = const()[name = string("op_4485_pad_0"), val = tensor([0, 0])]; - tensor var_4485_dilations_0 = const()[name = string("op_4485_dilations_0"), val = tensor([1])]; - tensor squeeze_3_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100755456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101640256))))[name = string("squeeze_3_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_4470_cast_fp16 = transpose(perm = var_4469, x = attn_output_35_cast_fp16)[name = string("transpose_203")]; - tensor var_4485_cast_fp16 = conv(dilations = var_4485_dilations_0, groups = var_4485_groups_0, pad = var_4485_pad_0, pad_type = var_4485_pad_type_0, strides = var_4485_strides_0, weight = squeeze_3_cast_fp16_to_fp32_to_fp16_palettized, x = var_4470_cast_fp16)[name = string("op_4485_cast_fp16")]; - tensor var_4489 = const()[name = string("op_4489"), val = tensor([0, 2, 1])]; - int32 var_4500 = const()[name = string("op_4500"), val = int32(-1)]; - fp16 const_156_promoted_to_fp16 = const()[name = string("const_156_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_57_cast_fp16 = transpose(perm = var_4489, x = var_4485_cast_fp16)[name = string("transpose_202")]; - tensor var_4502_cast_fp16 = mul(x = hidden_states_57_cast_fp16, y = const_156_promoted_to_fp16)[name = string("op_4502_cast_fp16")]; - bool input_71_interleave_0 = const()[name = string("input_71_interleave_0"), val = bool(false)]; - tensor input_71_cast_fp16 = concat(axis = var_4500, interleave = input_71_interleave_0, values = (hidden_states_57_cast_fp16, var_4502_cast_fp16))[name = string("input_71_cast_fp16")]; - tensor normed_85_axes_0 = const()[name = string("normed_85_axes_0"), val = tensor([-1])]; - fp16 var_4497_to_fp16 = const()[name = string("op_4497_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_85_cast_fp16 = layer_norm(axes = normed_85_axes_0, epsilon = var_4497_to_fp16, x = input_71_cast_fp16)[name = string("normed_85_cast_fp16")]; - tensor normed_87_begin_0 = const()[name = string("normed_87_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_87_end_0 = const()[name = string("normed_87_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_87_end_mask_0 = const()[name = string("normed_87_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_87_cast_fp16 = slice_by_index(begin = normed_87_begin_0, end = normed_87_end_0, end_mask = normed_87_end_mask_0, x = normed_85_cast_fp16)[name = string("normed_87_cast_fp16")]; - tensor var_4516_to_fp16 = const()[name = string("op_4516_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101658752)))]; - tensor attn_output_39_cast_fp16 = mul(x = normed_87_cast_fp16, y = var_4516_to_fp16)[name = string("attn_output_39_cast_fp16")]; - tensor hidden_states_59_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = attn_output_39_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; - int32 var_4529 = const()[name = string("op_4529"), val = int32(-1)]; - fp16 const_160_promoted_to_fp16 = const()[name = string("const_160_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4531_cast_fp16 = mul(x = hidden_states_59_cast_fp16, y = const_160_promoted_to_fp16)[name = string("op_4531_cast_fp16")]; - bool input_73_interleave_0 = const()[name = string("input_73_interleave_0"), val = bool(false)]; - tensor input_73_cast_fp16 = concat(axis = var_4529, interleave = input_73_interleave_0, values = (hidden_states_59_cast_fp16, var_4531_cast_fp16))[name = string("input_73_cast_fp16")]; - tensor normed_89_axes_0 = const()[name = string("normed_89_axes_0"), val = tensor([-1])]; - fp16 var_4526_to_fp16 = const()[name = string("op_4526_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_89_cast_fp16 = layer_norm(axes = normed_89_axes_0, epsilon = var_4526_to_fp16, x = input_73_cast_fp16)[name = string("normed_89_cast_fp16")]; - tensor normed_91_begin_0 = const()[name = string("normed_91_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_91_end_0 = const()[name = string("normed_91_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_91_end_mask_0 = const()[name = string("normed_91_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_91_cast_fp16 = slice_by_index(begin = normed_91_begin_0, end = normed_91_end_0, end_mask = normed_91_end_mask_0, x = normed_89_cast_fp16)[name = string("normed_91_cast_fp16")]; - tensor var_4545_to_fp16 = const()[name = string("op_4545_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101661120)))]; - tensor x_61_cast_fp16 = mul(x = normed_91_cast_fp16, y = var_4545_to_fp16)[name = string("x_61_cast_fp16")]; - tensor var_4557 = const()[name = string("op_4557"), val = tensor([0, 2, 1])]; - tensor input_75_axes_0 = const()[name = string("input_75_axes_0"), val = tensor([2])]; - tensor var_4558_cast_fp16 = transpose(perm = var_4557, x = x_61_cast_fp16)[name = string("transpose_201")]; - tensor input_75_cast_fp16 = expand_dims(axes = input_75_axes_0, x = var_4558_cast_fp16)[name = string("input_75_cast_fp16")]; - string x_63_pad_type_0 = const()[name = string("x_63_pad_type_0"), val = string("valid")]; - tensor x_63_strides_0 = const()[name = string("x_63_strides_0"), val = tensor([1, 1])]; - tensor x_63_pad_0 = const()[name = string("x_63_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_63_dilations_0 = const()[name = string("x_63_dilations_0"), val = tensor([1, 1])]; - int32 x_63_groups_0 = const()[name = string("x_63_groups_0"), val = int32(1)]; - tensor model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1233903360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1239875392))))[name = string("model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_63_cast_fp16 = conv(dilations = x_63_dilations_0, groups = x_63_groups_0, pad = x_63_pad_0, pad_type = x_63_pad_type_0, strides = x_63_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("x_63_cast_fp16")]; - string b_7_pad_type_0 = const()[name = string("b_7_pad_type_0"), val = string("valid")]; - tensor b_7_strides_0 = const()[name = string("b_7_strides_0"), val = tensor([1, 1])]; - tensor b_7_pad_0 = const()[name = string("b_7_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_7_dilations_0 = const()[name = string("b_7_dilations_0"), val = tensor([1, 1])]; - int32 b_7_groups_0 = const()[name = string("b_7_groups_0"), val = int32(1)]; - tensor model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(603547136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1239986048))))[name = string("model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_7_cast_fp16 = conv(dilations = b_7_dilations_0, groups = b_7_groups_0, pad = b_7_pad_0, pad_type = b_7_pad_type_0, strides = b_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("b_7_cast_fp16")]; - string var_4583_mode_0 = const()[name = string("op_4583_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_4583_cast_fp16 = gelu(mode = var_4583_mode_0, x = x_63_cast_fp16)[name = string("op_4583_cast_fp16")]; - tensor input_77_cast_fp16 = mul(x = var_4583_cast_fp16, y = b_7_cast_fp16)[name = string("input_77_cast_fp16")]; - string e_7_pad_type_0 = const()[name = string("e_7_pad_type_0"), val = string("valid")]; - tensor e_7_strides_0 = const()[name = string("e_7_strides_0"), val = tensor([1, 1])]; - tensor e_7_pad_0 = const()[name = string("e_7_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_7_dilations_0 = const()[name = string("e_7_dilations_0"), val = tensor([1, 1])]; - int32 e_7_groups_0 = const()[name = string("e_7_groups_0"), val = int32(1)]; - tensor model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113828864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119800896))))[name = string("model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_7_cast_fp16 = conv(dilations = e_7_dilations_0, groups = e_7_groups_0, pad = e_7_pad_0, pad_type = e_7_pad_type_0, strides = e_7_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_77_cast_fp16)[name = string("e_7_cast_fp16")]; - tensor var_4591_axes_0 = const()[name = string("op_4591_axes_0"), val = tensor([2])]; - tensor var_4591_cast_fp16 = squeeze(axes = var_4591_axes_0, x = e_7_cast_fp16)[name = string("op_4591_cast_fp16")]; - tensor var_4592 = const()[name = string("op_4592"), val = tensor([0, 2, 1])]; - int32 var_4603 = const()[name = string("op_4603"), val = int32(-1)]; - fp16 const_164_promoted_to_fp16 = const()[name = string("const_164_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_61_cast_fp16 = transpose(perm = var_4592, x = var_4591_cast_fp16)[name = string("transpose_200")]; - tensor var_4605_cast_fp16 = mul(x = hidden_states_61_cast_fp16, y = const_164_promoted_to_fp16)[name = string("op_4605_cast_fp16")]; - bool input_79_interleave_0 = const()[name = string("input_79_interleave_0"), val = bool(false)]; - tensor input_79_cast_fp16 = concat(axis = var_4603, interleave = input_79_interleave_0, values = (hidden_states_61_cast_fp16, var_4605_cast_fp16))[name = string("input_79_cast_fp16")]; - tensor normed_93_axes_0 = const()[name = string("normed_93_axes_0"), val = tensor([-1])]; - fp16 var_4600_to_fp16 = const()[name = string("op_4600_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_93_cast_fp16 = layer_norm(axes = normed_93_axes_0, epsilon = var_4600_to_fp16, x = input_79_cast_fp16)[name = string("normed_93_cast_fp16")]; - tensor normed_95_begin_0 = const()[name = string("normed_95_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_95_end_0 = const()[name = string("normed_95_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_95_end_mask_0 = const()[name = string("normed_95_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_95_cast_fp16 = slice_by_index(begin = normed_95_begin_0, end = normed_95_end_0, end_mask = normed_95_end_mask_0, x = normed_93_cast_fp16)[name = string("normed_95_cast_fp16")]; - tensor var_4619_to_fp16 = const()[name = string("op_4619_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119819392)))]; - tensor hidden_states_63_cast_fp16 = mul(x = normed_95_cast_fp16, y = var_4619_to_fp16)[name = string("hidden_states_63_cast_fp16")]; - tensor hidden_states_65_cast_fp16 = add(x = hidden_states_59_cast_fp16, y = hidden_states_63_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; - int32 var_4673 = const()[name = string("op_4673"), val = int32(-1)]; - fp16 const_169_promoted_to_fp16 = const()[name = string("const_169_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4675_cast_fp16 = mul(x = hidden_states_65_cast_fp16, y = const_169_promoted_to_fp16)[name = string("op_4675_cast_fp16")]; - bool input_81_interleave_0 = const()[name = string("input_81_interleave_0"), val = bool(false)]; - tensor input_81_cast_fp16 = concat(axis = var_4673, interleave = input_81_interleave_0, values = (hidden_states_65_cast_fp16, var_4675_cast_fp16))[name = string("input_81_cast_fp16")]; - tensor normed_97_axes_0 = const()[name = string("normed_97_axes_0"), val = tensor([-1])]; - fp16 var_4670_to_fp16 = const()[name = string("op_4670_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_97_cast_fp16 = layer_norm(axes = normed_97_axes_0, epsilon = var_4670_to_fp16, x = input_81_cast_fp16)[name = string("normed_97_cast_fp16")]; - tensor normed_99_begin_0 = const()[name = string("normed_99_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_99_end_0 = const()[name = string("normed_99_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_99_end_mask_0 = const()[name = string("normed_99_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_99_cast_fp16 = slice_by_index(begin = normed_99_begin_0, end = normed_99_end_0, end_mask = normed_99_end_mask_0, x = normed_97_cast_fp16)[name = string("normed_99_cast_fp16")]; - tensor var_4689_to_fp16 = const()[name = string("op_4689_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119821760)))]; - tensor hidden_states_67_cast_fp16 = mul(x = normed_99_cast_fp16, y = var_4689_to_fp16)[name = string("hidden_states_67_cast_fp16")]; - tensor var_4700 = const()[name = string("op_4700"), val = tensor([0, 2, 1])]; - tensor var_4703_axes_0 = const()[name = string("op_4703_axes_0"), val = tensor([2])]; - tensor var_4701_cast_fp16 = transpose(perm = var_4700, x = hidden_states_67_cast_fp16)[name = string("transpose_199")]; - tensor var_4703_cast_fp16 = expand_dims(axes = var_4703_axes_0, x = var_4701_cast_fp16)[name = string("op_4703_cast_fp16")]; - string query_states_33_pad_type_0 = const()[name = string("query_states_33_pad_type_0"), val = string("valid")]; - tensor query_states_33_strides_0 = const()[name = string("query_states_33_strides_0"), val = tensor([1, 1])]; - tensor query_states_33_pad_0 = const()[name = string("query_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_33_dilations_0 = const()[name = string("query_states_33_dilations_0"), val = tensor([1, 1])]; - int32 query_states_33_groups_0 = const()[name = string("query_states_33_groups_0"), val = int32(1)]; - tensor query_states_33 = conv(dilations = query_states_33_dilations_0, groups = query_states_33_groups_0, pad = query_states_33_pad_0, pad_type = query_states_33_pad_type_0, strides = query_states_33_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_4703_cast_fp16)[name = string("query_states_33")]; - string key_states_41_pad_type_0 = const()[name = string("key_states_41_pad_type_0"), val = string("valid")]; - tensor key_states_41_strides_0 = const()[name = string("key_states_41_strides_0"), val = tensor([1, 1])]; - tensor key_states_41_pad_0 = const()[name = string("key_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_41_dilations_0 = const()[name = string("key_states_41_dilations_0"), val = tensor([1, 1])]; - int32 key_states_41_groups_0 = const()[name = string("key_states_41_groups_0"), val = int32(1)]; - tensor key_states_41 = conv(dilations = key_states_41_dilations_0, groups = key_states_41_groups_0, pad = key_states_41_pad_0, pad_type = key_states_41_pad_type_0, strides = key_states_41_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_4703_cast_fp16)[name = string("key_states_41")]; - string value_states_33_pad_type_0 = const()[name = string("value_states_33_pad_type_0"), val = string("valid")]; - tensor value_states_33_strides_0 = const()[name = string("value_states_33_strides_0"), val = tensor([1, 1])]; - tensor value_states_33_pad_0 = const()[name = string("value_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_33_dilations_0 = const()[name = string("value_states_33_dilations_0"), val = tensor([1, 1])]; - int32 value_states_33_groups_0 = const()[name = string("value_states_33_groups_0"), val = int32(1)]; - tensor value_states_33 = conv(dilations = value_states_33_dilations_0, groups = value_states_33_groups_0, pad = value_states_33_pad_0, pad_type = value_states_33_pad_type_0, strides = value_states_33_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_4703_cast_fp16)[name = string("value_states_33")]; - tensor var_4745 = const()[name = string("op_4745"), val = tensor([1, 4, 256, 64])]; - tensor var_4746 = reshape(shape = var_4745, x = query_states_33)[name = string("op_4746")]; - tensor var_4751 = const()[name = string("op_4751"), val = tensor([0, 1, 3, 2])]; - tensor var_4756 = const()[name = string("op_4756"), val = tensor([1, 1, 256, 64])]; - tensor var_4757 = reshape(shape = var_4756, x = key_states_41)[name = string("op_4757")]; - tensor var_4762 = const()[name = string("op_4762"), val = tensor([0, 1, 3, 2])]; - tensor var_4767 = const()[name = string("op_4767"), val = tensor([1, 1, 256, 64])]; - tensor var_4768 = reshape(shape = var_4767, x = value_states_33)[name = string("op_4768")]; - tensor var_4773 = const()[name = string("op_4773"), val = tensor([0, 1, 3, 2])]; - int32 var_4784 = const()[name = string("op_4784"), val = int32(-1)]; - fp16 const_174_promoted = const()[name = string("const_174_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_69 = transpose(perm = var_4751, x = var_4746)[name = string("transpose_198")]; - tensor var_4786 = mul(x = hidden_states_69, y = const_174_promoted)[name = string("op_4786")]; - bool input_85_interleave_0 = const()[name = string("input_85_interleave_0"), val = bool(false)]; - tensor input_85 = concat(axis = var_4784, interleave = input_85_interleave_0, values = (hidden_states_69, var_4786))[name = string("input_85")]; - tensor normed_101_axes_0 = const()[name = string("normed_101_axes_0"), val = tensor([-1])]; - fp16 var_4781_to_fp16 = const()[name = string("op_4781_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_101_cast_fp16 = layer_norm(axes = normed_101_axes_0, epsilon = var_4781_to_fp16, x = input_85)[name = string("normed_101_cast_fp16")]; - tensor normed_103_begin_0 = const()[name = string("normed_103_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_103_end_0 = const()[name = string("normed_103_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_103_end_mask_0 = const()[name = string("normed_103_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_103 = slice_by_index(begin = normed_103_begin_0, end = normed_103_end_0, end_mask = normed_103_end_mask_0, x = normed_101_cast_fp16)[name = string("normed_103")]; - tensor var_4800_to_fp16 = const()[name = string("op_4800_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119824128)))]; - tensor q_9_cast_fp16 = mul(x = normed_103, y = var_4800_to_fp16)[name = string("q_9_cast_fp16")]; - int32 var_4811 = const()[name = string("op_4811"), val = int32(-1)]; - fp16 const_178_promoted = const()[name = string("const_178_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_71 = transpose(perm = var_4762, x = var_4757)[name = string("transpose_197")]; - tensor var_4813 = mul(x = hidden_states_71, y = const_178_promoted)[name = string("op_4813")]; - bool input_87_interleave_0 = const()[name = string("input_87_interleave_0"), val = bool(false)]; - tensor input_87 = concat(axis = var_4811, interleave = input_87_interleave_0, values = (hidden_states_71, var_4813))[name = string("input_87")]; - tensor normed_105_axes_0 = const()[name = string("normed_105_axes_0"), val = tensor([-1])]; - fp16 var_4808_to_fp16 = const()[name = string("op_4808_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_105_cast_fp16 = layer_norm(axes = normed_105_axes_0, epsilon = var_4808_to_fp16, x = input_87)[name = string("normed_105_cast_fp16")]; - tensor normed_107_begin_0 = const()[name = string("normed_107_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_107_end_0 = const()[name = string("normed_107_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_107_end_mask_0 = const()[name = string("normed_107_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_107 = slice_by_index(begin = normed_107_begin_0, end = normed_107_end_0, end_mask = normed_107_end_mask_0, x = normed_105_cast_fp16)[name = string("normed_107")]; - tensor var_4827_to_fp16 = const()[name = string("op_4827_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119824704)))]; - tensor k_9_cast_fp16 = mul(x = normed_107, y = var_4827_to_fp16)[name = string("k_9_cast_fp16")]; - tensor var_4841_cast_fp16 = mul(x = q_9_cast_fp16, y = cos_5)[name = string("op_4841_cast_fp16")]; - tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9_cast_fp16)[name = string("x1_17_cast_fp16")]; - tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9_cast_fp16)[name = string("x2_17_cast_fp16")]; - fp16 const_184_promoted_to_fp16 = const()[name = string("const_184_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4862_cast_fp16 = mul(x = x2_17_cast_fp16, y = const_184_promoted_to_fp16)[name = string("op_4862_cast_fp16")]; - int32 var_4864 = const()[name = string("op_4864"), val = int32(-1)]; - bool var_4865_interleave_0 = const()[name = string("op_4865_interleave_0"), val = bool(false)]; - tensor var_4865_cast_fp16 = concat(axis = var_4864, interleave = var_4865_interleave_0, values = (var_4862_cast_fp16, x1_17_cast_fp16))[name = string("op_4865_cast_fp16")]; - tensor var_4866_cast_fp16 = mul(x = var_4865_cast_fp16, y = sin_5)[name = string("op_4866_cast_fp16")]; - tensor query_states_35_cast_fp16 = add(x = var_4841_cast_fp16, y = var_4866_cast_fp16)[name = string("query_states_35_cast_fp16")]; - tensor var_4869_cast_fp16 = mul(x = k_9_cast_fp16, y = cos_5)[name = string("op_4869_cast_fp16")]; - tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9_cast_fp16)[name = string("x1_19_cast_fp16")]; - tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9_cast_fp16)[name = string("x2_19_cast_fp16")]; - fp16 const_187_promoted_to_fp16 = const()[name = string("const_187_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_4890_cast_fp16 = mul(x = x2_19_cast_fp16, y = const_187_promoted_to_fp16)[name = string("op_4890_cast_fp16")]; - int32 var_4892 = const()[name = string("op_4892"), val = int32(-1)]; - bool var_4893_interleave_0 = const()[name = string("op_4893_interleave_0"), val = bool(false)]; - tensor var_4893_cast_fp16 = concat(axis = var_4892, interleave = var_4893_interleave_0, values = (var_4890_cast_fp16, x1_19_cast_fp16))[name = string("op_4893_cast_fp16")]; - tensor var_4894_cast_fp16 = mul(x = var_4893_cast_fp16, y = sin_5)[name = string("op_4894_cast_fp16")]; - tensor key_states_43_cast_fp16 = add(x = var_4869_cast_fp16, y = var_4894_cast_fp16)[name = string("key_states_43_cast_fp16")]; - tensor key_slice_9_begin_0 = const()[name = string("key_slice_9_begin_0"), val = tensor([4, 0, 0, 0])]; - tensor key_slice_9_end_0 = const()[name = string("key_slice_9_end_0"), val = tensor([5, 1, 512, 256])]; - tensor key_slice_9_end_mask_0 = const()[name = string("key_slice_9_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_9_cast_fp16 = slice_by_index(begin = key_slice_9_begin_0, end = key_slice_9_end_0, end_mask = key_slice_9_end_mask_0, x = coreml_update_state_59)[name = string("key_slice_9_cast_fp16")]; - tensor var_4931_begin_0 = const()[name = string("op_4931_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_4931_end_0 = const()[name = string("op_4931_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_4931_end_mask_0 = const()[name = string("op_4931_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_4931_cast_fp16 = slice_by_index(begin = var_4931_begin_0, end = var_4931_end_0, end_mask = var_4931_end_mask_0, x = key_slice_9_cast_fp16)[name = string("op_4931_cast_fp16")]; - int32 var_4958 = const()[name = string("op_4958"), val = int32(2)]; - bool shifted_key_9_interleave_0 = const()[name = string("shifted_key_9_interleave_0"), val = bool(false)]; - tensor shifted_key_9_cast_fp16 = concat(axis = var_4958, interleave = shifted_key_9_interleave_0, values = (var_4931_cast_fp16, key_states_43_cast_fp16))[name = string("shifted_key_9_cast_fp16")]; - tensor concat_56 = const()[name = string("concat_56"), val = tensor([4, 0, 0, 0])]; - tensor concat_57 = const()[name = string("concat_57"), val = tensor([5, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_local_internal_tensor_assign_9_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_local_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_9_stride_0, update = shifted_key_9_cast_fp16, x = coreml_update_state_59)[name = string("model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_164_write_state")]; - tensor coreml_update_state_60 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_164")]; - tensor value_slice_9_begin_0 = const()[name = string("value_slice_9_begin_0"), val = tensor([26, 0, 0, 0])]; - tensor value_slice_9_end_0 = const()[name = string("value_slice_9_end_0"), val = tensor([27, 1, 512, 256])]; - tensor value_slice_9_end_mask_0 = const()[name = string("value_slice_9_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_9_cast_fp16 = slice_by_index(begin = value_slice_9_begin_0, end = value_slice_9_end_0, end_mask = value_slice_9_end_mask_0, x = coreml_update_state_60)[name = string("value_slice_9_cast_fp16")]; - tensor var_5001_begin_0 = const()[name = string("op_5001_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_5001_end_0 = const()[name = string("op_5001_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_5001_end_mask_0 = const()[name = string("op_5001_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_5001_cast_fp16 = slice_by_index(begin = var_5001_begin_0, end = var_5001_end_0, end_mask = var_5001_end_mask_0, x = value_slice_9_cast_fp16)[name = string("op_5001_cast_fp16")]; - int32 var_5028 = const()[name = string("op_5028"), val = int32(2)]; - bool shifted_value_9_interleave_0 = const()[name = string("shifted_value_9_interleave_0"), val = bool(false)]; - tensor value_states_35 = transpose(perm = var_4773, x = var_4768)[name = string("transpose_196")]; - tensor shifted_value_9_cast_fp16 = concat(axis = var_5028, interleave = shifted_value_9_interleave_0, values = (var_5001_cast_fp16, value_states_35))[name = string("shifted_value_9_cast_fp16")]; - tensor concat_58 = const()[name = string("concat_58"), val = tensor([26, 0, 0, 0])]; - tensor concat_59 = const()[name = string("concat_59"), val = tensor([27, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_local_internal_tensor_assign_10_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_local_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_10_stride_0, update = shifted_value_9_cast_fp16, x = coreml_update_state_60)[name = string("model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_165_write_state")]; - tensor coreml_update_state_61 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_165")]; - tensor var_5056_begin_0 = const()[name = string("op_5056_begin_0"), val = tensor([4, 0, 0, 0])]; - tensor var_5056_end_0 = const()[name = string("op_5056_end_0"), val = tensor([5, 1, 512, 256])]; - tensor var_5056_end_mask_0 = const()[name = string("op_5056_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_5056_cast_fp16 = slice_by_index(begin = var_5056_begin_0, end = var_5056_end_0, end_mask = var_5056_end_mask_0, x = coreml_update_state_61)[name = string("op_5056_cast_fp16")]; - tensor var_5063_begin_0 = const()[name = string("op_5063_begin_0"), val = tensor([26, 0, 0, 0])]; - tensor var_5063_end_0 = const()[name = string("op_5063_end_0"), val = tensor([27, 1, 512, 256])]; - tensor var_5063_end_mask_0 = const()[name = string("op_5063_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_5063_cast_fp16 = slice_by_index(begin = var_5063_begin_0, end = var_5063_end_0, end_mask = var_5063_end_mask_0, x = coreml_update_state_61)[name = string("op_5063_cast_fp16")]; - tensor var_5102 = const()[name = string("op_5102"), val = tensor([1, 4, 1, 1])]; - tensor x_69_cast_fp16 = tile(reps = var_5102, x = var_5056_cast_fp16)[name = string("x_69_cast_fp16")]; - tensor var_5122 = const()[name = string("op_5122"), val = tensor([1, 4, 1, 1])]; - tensor x_75_cast_fp16 = tile(reps = var_5122, x = var_5063_cast_fp16)[name = string("x_75_cast_fp16")]; - bool var_5149_transpose_x_0 = const()[name = string("op_5149_transpose_x_0"), val = bool(false)]; - bool var_5149_transpose_y_0 = const()[name = string("op_5149_transpose_y_0"), val = bool(true)]; - tensor var_5149 = matmul(transpose_x = var_5149_transpose_x_0, transpose_y = var_5149_transpose_y_0, x = query_states_35_cast_fp16, y = x_69_cast_fp16)[name = string("op_5149")]; - fp16 var_5150_to_fp16 = const()[name = string("op_5150_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_17_cast_fp16 = mul(x = var_5149, y = var_5150_to_fp16)[name = string("attn_weights_17_cast_fp16")]; - tensor attn_weights_19_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = mask_slice_1)[name = string("attn_weights_19_cast_fp16")]; - int32 var_5185 = const()[name = string("op_5185"), val = int32(-1)]; - tensor var_5187_cast_fp16 = softmax(axis = var_5185, x = attn_weights_19_cast_fp16)[name = string("op_5187_cast_fp16")]; - tensor concat_64 = const()[name = string("concat_64"), val = tensor([4, 64, 512])]; - tensor reshape_12_cast_fp16 = reshape(shape = concat_64, x = var_5187_cast_fp16)[name = string("reshape_12_cast_fp16")]; - tensor concat_65 = const()[name = string("concat_65"), val = tensor([4, 512, 256])]; - tensor reshape_13_cast_fp16 = reshape(shape = concat_65, x = x_75_cast_fp16)[name = string("reshape_13_cast_fp16")]; - bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)]; - bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(false)]; - tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_12_cast_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")]; - tensor concat_69 = const()[name = string("concat_69"), val = tensor([1, 4, 64, 256])]; - tensor reshape_14_cast_fp16 = reshape(shape = concat_69, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")]; - tensor var_5199_perm_0 = const()[name = string("op_5199_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_5218 = const()[name = string("op_5218"), val = tensor([1, 64, 1024])]; - tensor var_5199_cast_fp16 = transpose(perm = var_5199_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_195")]; - tensor attn_output_45_cast_fp16 = reshape(shape = var_5218, x = var_5199_cast_fp16)[name = string("attn_output_45_cast_fp16")]; - tensor var_5223 = const()[name = string("op_5223"), val = tensor([0, 2, 1])]; - string var_5239_pad_type_0 = const()[name = string("op_5239_pad_type_0"), val = string("valid")]; - int32 var_5239_groups_0 = const()[name = string("op_5239_groups_0"), val = int32(1)]; - tensor var_5239_strides_0 = const()[name = string("op_5239_strides_0"), val = tensor([1])]; - tensor var_5239_pad_0 = const()[name = string("op_5239_pad_0"), val = tensor([0, 0])]; - tensor var_5239_dilations_0 = const()[name = string("op_5239_dilations_0"), val = tensor([1])]; - tensor squeeze_4_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119825280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120710080))))[name = string("squeeze_4_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_5224_cast_fp16 = transpose(perm = var_5223, x = attn_output_45_cast_fp16)[name = string("transpose_194")]; - tensor var_5239_cast_fp16 = conv(dilations = var_5239_dilations_0, groups = var_5239_groups_0, pad = var_5239_pad_0, pad_type = var_5239_pad_type_0, strides = var_5239_strides_0, weight = squeeze_4_cast_fp16_to_fp32_to_fp16_palettized, x = var_5224_cast_fp16)[name = string("op_5239_cast_fp16")]; - tensor var_5243 = const()[name = string("op_5243"), val = tensor([0, 2, 1])]; - int32 var_5254 = const()[name = string("op_5254"), val = int32(-1)]; - fp16 const_198_promoted_to_fp16 = const()[name = string("const_198_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_73_cast_fp16 = transpose(perm = var_5243, x = var_5239_cast_fp16)[name = string("transpose_193")]; - tensor var_5256_cast_fp16 = mul(x = hidden_states_73_cast_fp16, y = const_198_promoted_to_fp16)[name = string("op_5256_cast_fp16")]; - bool input_91_interleave_0 = const()[name = string("input_91_interleave_0"), val = bool(false)]; - tensor input_91_cast_fp16 = concat(axis = var_5254, interleave = input_91_interleave_0, values = (hidden_states_73_cast_fp16, var_5256_cast_fp16))[name = string("input_91_cast_fp16")]; - tensor normed_109_axes_0 = const()[name = string("normed_109_axes_0"), val = tensor([-1])]; - fp16 var_5251_to_fp16 = const()[name = string("op_5251_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_109_cast_fp16 = layer_norm(axes = normed_109_axes_0, epsilon = var_5251_to_fp16, x = input_91_cast_fp16)[name = string("normed_109_cast_fp16")]; - tensor normed_111_begin_0 = const()[name = string("normed_111_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_111_end_0 = const()[name = string("normed_111_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_111_end_mask_0 = const()[name = string("normed_111_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_111_cast_fp16 = slice_by_index(begin = normed_111_begin_0, end = normed_111_end_0, end_mask = normed_111_end_mask_0, x = normed_109_cast_fp16)[name = string("normed_111_cast_fp16")]; - tensor var_5270_to_fp16 = const()[name = string("op_5270_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120728576)))]; - tensor attn_output_49_cast_fp16 = mul(x = normed_111_cast_fp16, y = var_5270_to_fp16)[name = string("attn_output_49_cast_fp16")]; - tensor hidden_states_75_cast_fp16 = add(x = hidden_states_65_cast_fp16, y = attn_output_49_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; - int32 var_5283 = const()[name = string("op_5283"), val = int32(-1)]; - fp16 const_202_promoted_to_fp16 = const()[name = string("const_202_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5285_cast_fp16 = mul(x = hidden_states_75_cast_fp16, y = const_202_promoted_to_fp16)[name = string("op_5285_cast_fp16")]; - bool input_93_interleave_0 = const()[name = string("input_93_interleave_0"), val = bool(false)]; - tensor input_93_cast_fp16 = concat(axis = var_5283, interleave = input_93_interleave_0, values = (hidden_states_75_cast_fp16, var_5285_cast_fp16))[name = string("input_93_cast_fp16")]; - tensor normed_113_axes_0 = const()[name = string("normed_113_axes_0"), val = tensor([-1])]; - fp16 var_5280_to_fp16 = const()[name = string("op_5280_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_113_cast_fp16 = layer_norm(axes = normed_113_axes_0, epsilon = var_5280_to_fp16, x = input_93_cast_fp16)[name = string("normed_113_cast_fp16")]; - tensor normed_115_begin_0 = const()[name = string("normed_115_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_115_end_0 = const()[name = string("normed_115_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_115_end_mask_0 = const()[name = string("normed_115_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_115_cast_fp16 = slice_by_index(begin = normed_115_begin_0, end = normed_115_end_0, end_mask = normed_115_end_mask_0, x = normed_113_cast_fp16)[name = string("normed_115_cast_fp16")]; - tensor var_5299_to_fp16 = const()[name = string("op_5299_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120730944)))]; - tensor x_77_cast_fp16 = mul(x = normed_115_cast_fp16, y = var_5299_to_fp16)[name = string("x_77_cast_fp16")]; - tensor var_5311 = const()[name = string("op_5311"), val = tensor([0, 2, 1])]; - tensor input_95_axes_0 = const()[name = string("input_95_axes_0"), val = tensor([2])]; - tensor var_5312_cast_fp16 = transpose(perm = var_5311, x = x_77_cast_fp16)[name = string("transpose_192")]; - tensor input_95_cast_fp16 = expand_dims(axes = input_95_axes_0, x = var_5312_cast_fp16)[name = string("input_95_cast_fp16")]; - string x_79_pad_type_0 = const()[name = string("x_79_pad_type_0"), val = string("valid")]; - tensor x_79_strides_0 = const()[name = string("x_79_strides_0"), val = tensor([1, 1])]; - tensor x_79_pad_0 = const()[name = string("x_79_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_79_dilations_0 = const()[name = string("x_79_dilations_0"), val = tensor([1, 1])]; - int32 x_79_groups_0 = const()[name = string("x_79_groups_0"), val = int32(1)]; - tensor model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240096704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1246068736))))[name = string("model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_79_cast_fp16 = conv(dilations = x_79_dilations_0, groups = x_79_groups_0, pad = x_79_pad_0, pad_type = x_79_pad_type_0, strides = x_79_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_95_cast_fp16)[name = string("x_79_cast_fp16")]; - string b_9_pad_type_0 = const()[name = string("b_9_pad_type_0"), val = string("valid")]; - tensor b_9_strides_0 = const()[name = string("b_9_strides_0"), val = tensor([1, 1])]; - tensor b_9_pad_0 = const()[name = string("b_9_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_9_dilations_0 = const()[name = string("b_9_dilations_0"), val = tensor([1, 1])]; - int32 b_9_groups_0 = const()[name = string("b_9_groups_0"), val = int32(1)]; - tensor model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1246179392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1252151424))))[name = string("model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_9_cast_fp16 = conv(dilations = b_9_dilations_0, groups = b_9_groups_0, pad = b_9_pad_0, pad_type = b_9_pad_type_0, strides = b_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_95_cast_fp16)[name = string("b_9_cast_fp16")]; - string var_5337_mode_0 = const()[name = string("op_5337_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_5337_cast_fp16 = gelu(mode = var_5337_mode_0, x = x_79_cast_fp16)[name = string("op_5337_cast_fp16")]; - tensor input_97_cast_fp16 = mul(x = var_5337_cast_fp16, y = b_9_cast_fp16)[name = string("input_97_cast_fp16")]; - string e_9_pad_type_0 = const()[name = string("e_9_pad_type_0"), val = string("valid")]; - tensor e_9_strides_0 = const()[name = string("e_9_strides_0"), val = tensor([1, 1])]; - tensor e_9_pad_0 = const()[name = string("e_9_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_9_dilations_0 = const()[name = string("e_9_dilations_0"), val = tensor([1, 1])]; - int32 e_9_groups_0 = const()[name = string("e_9_groups_0"), val = int32(1)]; - tensor model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132898688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138870720))))[name = string("model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_9_cast_fp16 = conv(dilations = e_9_dilations_0, groups = e_9_groups_0, pad = e_9_pad_0, pad_type = e_9_pad_type_0, strides = e_9_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_97_cast_fp16)[name = string("e_9_cast_fp16")]; - tensor var_5345_axes_0 = const()[name = string("op_5345_axes_0"), val = tensor([2])]; - tensor var_5345_cast_fp16 = squeeze(axes = var_5345_axes_0, x = e_9_cast_fp16)[name = string("op_5345_cast_fp16")]; - tensor var_5346 = const()[name = string("op_5346"), val = tensor([0, 2, 1])]; - int32 var_5357 = const()[name = string("op_5357"), val = int32(-1)]; - fp16 const_206_promoted_to_fp16 = const()[name = string("const_206_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_77_cast_fp16 = transpose(perm = var_5346, x = var_5345_cast_fp16)[name = string("transpose_191")]; - tensor var_5359_cast_fp16 = mul(x = hidden_states_77_cast_fp16, y = const_206_promoted_to_fp16)[name = string("op_5359_cast_fp16")]; - bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)]; - tensor input_99_cast_fp16 = concat(axis = var_5357, interleave = input_99_interleave_0, values = (hidden_states_77_cast_fp16, var_5359_cast_fp16))[name = string("input_99_cast_fp16")]; - tensor normed_117_axes_0 = const()[name = string("normed_117_axes_0"), val = tensor([-1])]; - fp16 var_5354_to_fp16 = const()[name = string("op_5354_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_117_cast_fp16 = layer_norm(axes = normed_117_axes_0, epsilon = var_5354_to_fp16, x = input_99_cast_fp16)[name = string("normed_117_cast_fp16")]; - tensor normed_119_begin_0 = const()[name = string("normed_119_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_119_end_0 = const()[name = string("normed_119_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_119_end_mask_0 = const()[name = string("normed_119_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_119_cast_fp16 = slice_by_index(begin = normed_119_begin_0, end = normed_119_end_0, end_mask = normed_119_end_mask_0, x = normed_117_cast_fp16)[name = string("normed_119_cast_fp16")]; - tensor var_5373_to_fp16 = const()[name = string("op_5373_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138889216)))]; - tensor hidden_states_79_cast_fp16 = mul(x = normed_119_cast_fp16, y = var_5373_to_fp16)[name = string("hidden_states_79_cast_fp16")]; - tensor hidden_states_81_cast_fp16 = add(x = hidden_states_75_cast_fp16, y = hidden_states_79_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; - int32 var_5390_axis_0 = const()[name = string("op_5390_axis_0"), val = int32(1)]; - int32 var_5390_batch_dims_0 = const()[name = string("op_5390_batch_dims_0"), val = int32(0)]; - bool var_5390_validate_indices_0 = const()[name = string("op_5390_validate_indices_0"), val = bool(false)]; - tensor var_5382_to_fp16 = const()[name = string("op_5382_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143085952)))]; - tensor var_5390_cast_fp16_cast_uint16 = gather(axis = var_5390_axis_0, batch_dims = var_5390_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_5390_validate_indices_0, x = var_5382_to_fp16)[name = string("op_5390_cast_fp16_cast_uint16")]; - tensor var_5394 = const()[name = string("op_5394"), val = tensor([1, 64, 1, 256])]; - tensor cos_31_cast_fp16 = reshape(shape = var_5394, x = var_5390_cast_fp16_cast_uint16)[name = string("cos_31_cast_fp16")]; - int32 var_5404_axis_0 = const()[name = string("op_5404_axis_0"), val = int32(1)]; - int32 var_5404_batch_dims_0 = const()[name = string("op_5404_batch_dims_0"), val = int32(0)]; - bool var_5404_validate_indices_0 = const()[name = string("op_5404_validate_indices_0"), val = bool(false)]; - tensor var_5396_to_fp16 = const()[name = string("op_5396_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138891584)))]; - tensor var_5404_cast_fp16_cast_uint16 = gather(axis = var_5404_axis_0, batch_dims = var_5404_batch_dims_0, indices = position_ids_to_uint16, validate_indices = var_5404_validate_indices_0, x = var_5396_to_fp16)[name = string("op_5404_cast_fp16_cast_uint16")]; - tensor var_5408 = const()[name = string("op_5408"), val = tensor([1, 64, 1, 256])]; - tensor sin_31_cast_fp16 = reshape(shape = var_5408, x = var_5404_cast_fp16_cast_uint16)[name = string("sin_31_cast_fp16")]; - int32 var_5429 = const()[name = string("op_5429"), val = int32(-1)]; - fp16 const_211_promoted_to_fp16 = const()[name = string("const_211_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5431_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = const_211_promoted_to_fp16)[name = string("op_5431_cast_fp16")]; - bool input_101_interleave_0 = const()[name = string("input_101_interleave_0"), val = bool(false)]; - tensor input_101_cast_fp16 = concat(axis = var_5429, interleave = input_101_interleave_0, values = (hidden_states_81_cast_fp16, var_5431_cast_fp16))[name = string("input_101_cast_fp16")]; - tensor normed_121_axes_0 = const()[name = string("normed_121_axes_0"), val = tensor([-1])]; - fp16 var_5426_to_fp16 = const()[name = string("op_5426_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_121_cast_fp16 = layer_norm(axes = normed_121_axes_0, epsilon = var_5426_to_fp16, x = input_101_cast_fp16)[name = string("normed_121_cast_fp16")]; - tensor normed_123_begin_0 = const()[name = string("normed_123_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_123_end_0 = const()[name = string("normed_123_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_123_end_mask_0 = const()[name = string("normed_123_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_123_cast_fp16 = slice_by_index(begin = normed_123_begin_0, end = normed_123_end_0, end_mask = normed_123_end_mask_0, x = normed_121_cast_fp16)[name = string("normed_123_cast_fp16")]; - tensor var_5445_to_fp16 = const()[name = string("op_5445_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147280320)))]; - tensor hidden_states_83_cast_fp16 = mul(x = normed_123_cast_fp16, y = var_5445_to_fp16)[name = string("hidden_states_83_cast_fp16")]; - tensor var_5456 = const()[name = string("op_5456"), val = tensor([0, 2, 1])]; - tensor var_5459_axes_0 = const()[name = string("op_5459_axes_0"), val = tensor([2])]; - tensor var_5457_cast_fp16 = transpose(perm = var_5456, x = hidden_states_83_cast_fp16)[name = string("transpose_190")]; - tensor var_5459_cast_fp16 = expand_dims(axes = var_5459_axes_0, x = var_5457_cast_fp16)[name = string("op_5459_cast_fp16")]; - string query_states_41_pad_type_0 = const()[name = string("query_states_41_pad_type_0"), val = string("valid")]; - tensor query_states_41_strides_0 = const()[name = string("query_states_41_strides_0"), val = tensor([1, 1])]; - tensor query_states_41_pad_0 = const()[name = string("query_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_41_dilations_0 = const()[name = string("query_states_41_dilations_0"), val = tensor([1, 1])]; - int32 query_states_41_groups_0 = const()[name = string("query_states_41_groups_0"), val = int32(1)]; - tensor query_states_41 = conv(dilations = query_states_41_dilations_0, groups = query_states_41_groups_0, pad = query_states_41_pad_0, pad_type = query_states_41_pad_type_0, strides = query_states_41_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_5459_cast_fp16)[name = string("query_states_41")]; - string key_states_51_pad_type_0 = const()[name = string("key_states_51_pad_type_0"), val = string("valid")]; - tensor key_states_51_strides_0 = const()[name = string("key_states_51_strides_0"), val = tensor([1, 1])]; - tensor key_states_51_pad_0 = const()[name = string("key_states_51_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_51_dilations_0 = const()[name = string("key_states_51_dilations_0"), val = tensor([1, 1])]; - int32 key_states_51_groups_0 = const()[name = string("key_states_51_groups_0"), val = int32(1)]; - tensor key_states_51 = conv(dilations = key_states_51_dilations_0, groups = key_states_51_groups_0, pad = key_states_51_pad_0, pad_type = key_states_51_pad_type_0, strides = key_states_51_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_5459_cast_fp16)[name = string("key_states_51")]; - string value_states_41_pad_type_0 = const()[name = string("value_states_41_pad_type_0"), val = string("valid")]; - tensor value_states_41_strides_0 = const()[name = string("value_states_41_strides_0"), val = tensor([1, 1])]; - tensor value_states_41_pad_0 = const()[name = string("value_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_41_dilations_0 = const()[name = string("value_states_41_dilations_0"), val = tensor([1, 1])]; - int32 value_states_41_groups_0 = const()[name = string("value_states_41_groups_0"), val = int32(1)]; - tensor value_states_41 = conv(dilations = value_states_41_dilations_0, groups = value_states_41_groups_0, pad = value_states_41_pad_0, pad_type = value_states_41_pad_type_0, strides = value_states_41_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_5459_cast_fp16)[name = string("value_states_41")]; - tensor var_5501 = const()[name = string("op_5501"), val = tensor([1, 4, 256, 64])]; - tensor var_5502 = reshape(shape = var_5501, x = query_states_41)[name = string("op_5502")]; - tensor var_5507 = const()[name = string("op_5507"), val = tensor([0, 1, 3, 2])]; - tensor var_5512 = const()[name = string("op_5512"), val = tensor([1, 1, 256, 64])]; - tensor var_5513 = reshape(shape = var_5512, x = key_states_51)[name = string("op_5513")]; - tensor var_5518 = const()[name = string("op_5518"), val = tensor([0, 1, 3, 2])]; - tensor var_5523 = const()[name = string("op_5523"), val = tensor([1, 1, 256, 64])]; - tensor var_5524 = reshape(shape = var_5523, x = value_states_41)[name = string("op_5524")]; - tensor var_5529 = const()[name = string("op_5529"), val = tensor([0, 1, 3, 2])]; - int32 var_5540 = const()[name = string("op_5540"), val = int32(-1)]; - fp16 const_216_promoted = const()[name = string("const_216_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_85 = transpose(perm = var_5507, x = var_5502)[name = string("transpose_189")]; - tensor var_5542 = mul(x = hidden_states_85, y = const_216_promoted)[name = string("op_5542")]; - bool input_105_interleave_0 = const()[name = string("input_105_interleave_0"), val = bool(false)]; - tensor input_105 = concat(axis = var_5540, interleave = input_105_interleave_0, values = (hidden_states_85, var_5542))[name = string("input_105")]; - tensor normed_125_axes_0 = const()[name = string("normed_125_axes_0"), val = tensor([-1])]; - fp16 var_5537_to_fp16 = const()[name = string("op_5537_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_125_cast_fp16 = layer_norm(axes = normed_125_axes_0, epsilon = var_5537_to_fp16, x = input_105)[name = string("normed_125_cast_fp16")]; - tensor normed_127_begin_0 = const()[name = string("normed_127_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_127_end_0 = const()[name = string("normed_127_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_127_end_mask_0 = const()[name = string("normed_127_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_127 = slice_by_index(begin = normed_127_begin_0, end = normed_127_end_0, end_mask = normed_127_end_mask_0, x = normed_125_cast_fp16)[name = string("normed_127")]; - tensor var_5556_to_fp16 = const()[name = string("op_5556_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147282688)))]; - tensor q_11_cast_fp16 = mul(x = normed_127, y = var_5556_to_fp16)[name = string("q_11_cast_fp16")]; - int32 var_5567 = const()[name = string("op_5567"), val = int32(-1)]; - fp16 const_220_promoted = const()[name = string("const_220_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_87 = transpose(perm = var_5518, x = var_5513)[name = string("transpose_188")]; - tensor var_5569 = mul(x = hidden_states_87, y = const_220_promoted)[name = string("op_5569")]; - bool input_107_interleave_0 = const()[name = string("input_107_interleave_0"), val = bool(false)]; - tensor input_107 = concat(axis = var_5567, interleave = input_107_interleave_0, values = (hidden_states_87, var_5569))[name = string("input_107")]; - tensor normed_129_axes_0 = const()[name = string("normed_129_axes_0"), val = tensor([-1])]; - fp16 var_5564_to_fp16 = const()[name = string("op_5564_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_129_cast_fp16 = layer_norm(axes = normed_129_axes_0, epsilon = var_5564_to_fp16, x = input_107)[name = string("normed_129_cast_fp16")]; - tensor normed_131_begin_0 = const()[name = string("normed_131_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_131_end_0 = const()[name = string("normed_131_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_131_end_mask_0 = const()[name = string("normed_131_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_131 = slice_by_index(begin = normed_131_begin_0, end = normed_131_end_0, end_mask = normed_131_end_mask_0, x = normed_129_cast_fp16)[name = string("normed_131")]; - tensor var_5583_to_fp16 = const()[name = string("op_5583_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147283264)))]; - tensor k_11_cast_fp16 = mul(x = normed_131, y = var_5583_to_fp16)[name = string("k_11_cast_fp16")]; - tensor var_5589 = const()[name = string("op_5589"), val = tensor([0, 2, 1, 3])]; - tensor var_5595 = const()[name = string("op_5595"), val = tensor([0, 2, 1, 3])]; - tensor cos_35 = transpose(perm = var_5589, x = cos_31_cast_fp16)[name = string("transpose_187")]; - tensor var_5597_cast_fp16 = mul(x = q_11_cast_fp16, y = cos_35)[name = string("op_5597_cast_fp16")]; - tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11_cast_fp16)[name = string("x1_21_cast_fp16")]; - tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11_cast_fp16)[name = string("x2_21_cast_fp16")]; - fp16 const_226_promoted_to_fp16 = const()[name = string("const_226_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5618_cast_fp16 = mul(x = x2_21_cast_fp16, y = const_226_promoted_to_fp16)[name = string("op_5618_cast_fp16")]; - int32 var_5620 = const()[name = string("op_5620"), val = int32(-1)]; - bool var_5621_interleave_0 = const()[name = string("op_5621_interleave_0"), val = bool(false)]; - tensor var_5621_cast_fp16 = concat(axis = var_5620, interleave = var_5621_interleave_0, values = (var_5618_cast_fp16, x1_21_cast_fp16))[name = string("op_5621_cast_fp16")]; - tensor sin_35 = transpose(perm = var_5595, x = sin_31_cast_fp16)[name = string("transpose_186")]; - tensor var_5622_cast_fp16 = mul(x = var_5621_cast_fp16, y = sin_35)[name = string("op_5622_cast_fp16")]; - tensor query_states_43_cast_fp16 = add(x = var_5597_cast_fp16, y = var_5622_cast_fp16)[name = string("query_states_43_cast_fp16")]; - tensor var_5625_cast_fp16 = mul(x = k_11_cast_fp16, y = cos_35)[name = string("op_5625_cast_fp16")]; - tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11_cast_fp16)[name = string("x1_23_cast_fp16")]; - tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11_cast_fp16)[name = string("x2_23_cast_fp16")]; - fp16 const_229_promoted_to_fp16 = const()[name = string("const_229_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5646_cast_fp16 = mul(x = x2_23_cast_fp16, y = const_229_promoted_to_fp16)[name = string("op_5646_cast_fp16")]; - int32 var_5648 = const()[name = string("op_5648"), val = int32(-1)]; - bool var_5649_interleave_0 = const()[name = string("op_5649_interleave_0"), val = bool(false)]; - tensor var_5649_cast_fp16 = concat(axis = var_5648, interleave = var_5649_interleave_0, values = (var_5646_cast_fp16, x1_23_cast_fp16))[name = string("op_5649_cast_fp16")]; - tensor var_5650_cast_fp16 = mul(x = var_5649_cast_fp16, y = sin_35)[name = string("op_5650_cast_fp16")]; - tensor key_states_53_cast_fp16 = add(x = var_5625_cast_fp16, y = var_5650_cast_fp16)[name = string("key_states_53_cast_fp16")]; - tensor seq_len_25 = const()[name = string("seq_len_25"), val = tensor([64])]; - tensor end_pos_1 = add(x = current_pos, y = seq_len_25)[name = string("end_pos_1")]; - tensor read_state_1 = read_state(input = model_model_kv_cache_global)[name = string("read_state_1")]; - tensor expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor([0])]; - tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; - tensor expand_dims_53 = const()[name = string("expand_dims_53"), val = tensor([0])]; - tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([1])]; - int32 concat_72_axis_0 = const()[name = string("concat_72_axis_0"), val = int32(0)]; - bool concat_72_interleave_0 = const()[name = string("concat_72_interleave_0"), val = bool(false)]; - tensor concat_72 = concat(axis = concat_72_axis_0, interleave = concat_72_interleave_0, values = (expand_dims_50, expand_dims_51, current_pos, expand_dims_53))[name = string("concat_72")]; - tensor concat_73_values1_0 = const()[name = string("concat_73_values1_0"), val = tensor([0])]; - tensor concat_73_values3_0 = const()[name = string("concat_73_values3_0"), val = tensor([0])]; - int32 concat_73_axis_0 = const()[name = string("concat_73_axis_0"), val = int32(0)]; - bool concat_73_interleave_0 = const()[name = string("concat_73_interleave_0"), val = bool(false)]; - tensor concat_73 = concat(axis = concat_73_axis_0, interleave = concat_73_interleave_0, values = (expand_dims_54, concat_73_values1_0, end_pos_1, concat_73_values3_0))[name = string("concat_73")]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_72, begin_mask = model_model_kv_cache_global_internal_tensor_assign_1_begin_mask_0, end = concat_73, end_mask = model_model_kv_cache_global_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_1_stride_0, update = key_states_53_cast_fp16, x = read_state_1)[name = string("model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_166_write_state")]; - tensor coreml_update_state_62 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_166")]; - tensor expand_dims_56 = const()[name = string("expand_dims_56"), val = tensor([4])]; - tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; - tensor expand_dims_59 = const()[name = string("expand_dims_59"), val = tensor([0])]; - tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; - int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)]; - bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)]; - tensor concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (expand_dims_56, expand_dims_57, current_pos, expand_dims_59))[name = string("concat_76")]; - tensor concat_77_values1_0 = const()[name = string("concat_77_values1_0"), val = tensor([0])]; - tensor concat_77_values3_0 = const()[name = string("concat_77_values3_0"), val = tensor([0])]; - int32 concat_77_axis_0 = const()[name = string("concat_77_axis_0"), val = int32(0)]; - bool concat_77_interleave_0 = const()[name = string("concat_77_interleave_0"), val = bool(false)]; - tensor concat_77 = concat(axis = concat_77_axis_0, interleave = concat_77_interleave_0, values = (expand_dims_60, concat_77_values1_0, end_pos_1, concat_77_values3_0))[name = string("concat_77")]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_43 = transpose(perm = var_5529, x = var_5524)[name = string("transpose_185")]; - tensor model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_76, begin_mask = model_model_kv_cache_global_internal_tensor_assign_2_begin_mask_0, end = concat_77, end_mask = model_model_kv_cache_global_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_2_stride_0, update = value_states_43, x = coreml_update_state_62)[name = string("model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_167_write_state")]; - tensor coreml_update_state_63 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_167")]; - tensor var_5749_begin_0 = const()[name = string("op_5749_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_5749_end_0 = const()[name = string("op_5749_end_0"), val = tensor([1, 1, 4096, 256])]; - tensor var_5749_end_mask_0 = const()[name = string("op_5749_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_5749_cast_fp16 = slice_by_index(begin = var_5749_begin_0, end = var_5749_end_0, end_mask = var_5749_end_mask_0, x = coreml_update_state_63)[name = string("op_5749_cast_fp16")]; - tensor var_5756_begin_0 = const()[name = string("op_5756_begin_0"), val = tensor([4, 0, 0, 0])]; - tensor var_5756_end_0 = const()[name = string("op_5756_end_0"), val = tensor([5, 1, 4096, 256])]; - tensor var_5756_end_mask_0 = const()[name = string("op_5756_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_5756_cast_fp16 = slice_by_index(begin = var_5756_begin_0, end = var_5756_end_0, end_mask = var_5756_end_mask_0, x = coreml_update_state_63)[name = string("op_5756_cast_fp16")]; - tensor var_5795 = const()[name = string("op_5795"), val = tensor([1, 4, 1, 1])]; - tensor x_85_cast_fp16 = tile(reps = var_5795, x = var_5749_cast_fp16)[name = string("x_85_cast_fp16")]; - tensor var_5815 = const()[name = string("op_5815"), val = tensor([1, 4, 1, 1])]; - tensor x_91_cast_fp16 = tile(reps = var_5815, x = var_5756_cast_fp16)[name = string("x_91_cast_fp16")]; - bool var_5842_transpose_x_0 = const()[name = string("op_5842_transpose_x_0"), val = bool(false)]; - bool var_5842_transpose_y_0 = const()[name = string("op_5842_transpose_y_0"), val = bool(true)]; - tensor var_5842 = matmul(transpose_x = var_5842_transpose_x_0, transpose_y = var_5842_transpose_y_0, x = query_states_43_cast_fp16, y = x_85_cast_fp16)[name = string("op_5842")]; - fp16 var_5843_to_fp16 = const()[name = string("op_5843_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_21_cast_fp16 = mul(x = var_5842, y = var_5843_to_fp16)[name = string("attn_weights_21_cast_fp16")]; - tensor attn_weights_23_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("attn_weights_23_cast_fp16")]; - int32 var_5878 = const()[name = string("op_5878"), val = int32(-1)]; - tensor var_5880_cast_fp16 = softmax(axis = var_5878, x = attn_weights_23_cast_fp16)[name = string("op_5880_cast_fp16")]; - tensor concat_82 = const()[name = string("concat_82"), val = tensor([4, 64, 4096])]; - tensor reshape_15_cast_fp16 = reshape(shape = concat_82, x = var_5880_cast_fp16)[name = string("reshape_15_cast_fp16")]; - tensor concat_83 = const()[name = string("concat_83"), val = tensor([4, 4096, 256])]; - tensor reshape_16_cast_fp16 = reshape(shape = concat_83, x = x_91_cast_fp16)[name = string("reshape_16_cast_fp16")]; - bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)]; - bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(false)]; - tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_15_cast_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")]; - tensor concat_87 = const()[name = string("concat_87"), val = tensor([1, 4, 64, 256])]; - tensor reshape_17_cast_fp16 = reshape(shape = concat_87, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")]; - tensor var_5892_perm_0 = const()[name = string("op_5892_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_5911 = const()[name = string("op_5911"), val = tensor([1, 64, 1024])]; - tensor var_5892_cast_fp16 = transpose(perm = var_5892_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_184")]; - tensor attn_output_55_cast_fp16 = reshape(shape = var_5911, x = var_5892_cast_fp16)[name = string("attn_output_55_cast_fp16")]; - tensor var_5916 = const()[name = string("op_5916"), val = tensor([0, 2, 1])]; - string var_5932_pad_type_0 = const()[name = string("op_5932_pad_type_0"), val = string("valid")]; - int32 var_5932_groups_0 = const()[name = string("op_5932_groups_0"), val = int32(1)]; - tensor var_5932_strides_0 = const()[name = string("op_5932_strides_0"), val = tensor([1])]; - tensor var_5932_pad_0 = const()[name = string("op_5932_pad_0"), val = tensor([0, 0])]; - tensor var_5932_dilations_0 = const()[name = string("op_5932_dilations_0"), val = tensor([1])]; - tensor squeeze_5_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147283840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148168640))))[name = string("squeeze_5_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_5917_cast_fp16 = transpose(perm = var_5916, x = attn_output_55_cast_fp16)[name = string("transpose_183")]; - tensor var_5932_cast_fp16 = conv(dilations = var_5932_dilations_0, groups = var_5932_groups_0, pad = var_5932_pad_0, pad_type = var_5932_pad_type_0, strides = var_5932_strides_0, weight = squeeze_5_cast_fp16_to_fp32_to_fp16_palettized, x = var_5917_cast_fp16)[name = string("op_5932_cast_fp16")]; - tensor var_5936 = const()[name = string("op_5936"), val = tensor([0, 2, 1])]; - int32 var_5947 = const()[name = string("op_5947"), val = int32(-1)]; - fp16 const_241_promoted_to_fp16 = const()[name = string("const_241_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_89_cast_fp16 = transpose(perm = var_5936, x = var_5932_cast_fp16)[name = string("transpose_182")]; - tensor var_5949_cast_fp16 = mul(x = hidden_states_89_cast_fp16, y = const_241_promoted_to_fp16)[name = string("op_5949_cast_fp16")]; - bool input_111_interleave_0 = const()[name = string("input_111_interleave_0"), val = bool(false)]; - tensor input_111_cast_fp16 = concat(axis = var_5947, interleave = input_111_interleave_0, values = (hidden_states_89_cast_fp16, var_5949_cast_fp16))[name = string("input_111_cast_fp16")]; - tensor normed_133_axes_0 = const()[name = string("normed_133_axes_0"), val = tensor([-1])]; - fp16 var_5944_to_fp16 = const()[name = string("op_5944_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_133_cast_fp16 = layer_norm(axes = normed_133_axes_0, epsilon = var_5944_to_fp16, x = input_111_cast_fp16)[name = string("normed_133_cast_fp16")]; - tensor normed_135_begin_0 = const()[name = string("normed_135_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_135_end_0 = const()[name = string("normed_135_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_135_end_mask_0 = const()[name = string("normed_135_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_135_cast_fp16 = slice_by_index(begin = normed_135_begin_0, end = normed_135_end_0, end_mask = normed_135_end_mask_0, x = normed_133_cast_fp16)[name = string("normed_135_cast_fp16")]; - tensor var_5963_to_fp16 = const()[name = string("op_5963_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148187136)))]; - tensor attn_output_59_cast_fp16 = mul(x = normed_135_cast_fp16, y = var_5963_to_fp16)[name = string("attn_output_59_cast_fp16")]; - tensor hidden_states_91_cast_fp16 = add(x = hidden_states_81_cast_fp16, y = attn_output_59_cast_fp16)[name = string("hidden_states_91_cast_fp16")]; - int32 var_5976 = const()[name = string("op_5976"), val = int32(-1)]; - fp16 const_245_promoted_to_fp16 = const()[name = string("const_245_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_5978_cast_fp16 = mul(x = hidden_states_91_cast_fp16, y = const_245_promoted_to_fp16)[name = string("op_5978_cast_fp16")]; - bool input_113_interleave_0 = const()[name = string("input_113_interleave_0"), val = bool(false)]; - tensor input_113_cast_fp16 = concat(axis = var_5976, interleave = input_113_interleave_0, values = (hidden_states_91_cast_fp16, var_5978_cast_fp16))[name = string("input_113_cast_fp16")]; - tensor normed_137_axes_0 = const()[name = string("normed_137_axes_0"), val = tensor([-1])]; - fp16 var_5973_to_fp16 = const()[name = string("op_5973_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_137_cast_fp16 = layer_norm(axes = normed_137_axes_0, epsilon = var_5973_to_fp16, x = input_113_cast_fp16)[name = string("normed_137_cast_fp16")]; - tensor normed_139_begin_0 = const()[name = string("normed_139_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_139_end_0 = const()[name = string("normed_139_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_139_end_mask_0 = const()[name = string("normed_139_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_139_cast_fp16 = slice_by_index(begin = normed_139_begin_0, end = normed_139_end_0, end_mask = normed_139_end_mask_0, x = normed_137_cast_fp16)[name = string("normed_139_cast_fp16")]; - tensor var_5992_to_fp16 = const()[name = string("op_5992_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148189504)))]; - tensor x_93_cast_fp16 = mul(x = normed_139_cast_fp16, y = var_5992_to_fp16)[name = string("x_93_cast_fp16")]; - tensor var_6004 = const()[name = string("op_6004"), val = tensor([0, 2, 1])]; - tensor input_115_axes_0 = const()[name = string("input_115_axes_0"), val = tensor([2])]; - tensor var_6005_cast_fp16 = transpose(perm = var_6004, x = x_93_cast_fp16)[name = string("transpose_181")]; - tensor input_115_cast_fp16 = expand_dims(axes = input_115_axes_0, x = var_6005_cast_fp16)[name = string("input_115_cast_fp16")]; - string x_95_pad_type_0 = const()[name = string("x_95_pad_type_0"), val = string("valid")]; - tensor x_95_strides_0 = const()[name = string("x_95_strides_0"), val = tensor([1, 1])]; - tensor x_95_pad_0 = const()[name = string("x_95_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_95_dilations_0 = const()[name = string("x_95_dilations_0"), val = tensor([1, 1])]; - int32 x_95_groups_0 = const()[name = string("x_95_groups_0"), val = int32(1)]; - tensor model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1252262080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1258234112))))[name = string("model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_95_cast_fp16 = conv(dilations = x_95_dilations_0, groups = x_95_groups_0, pad = x_95_pad_0, pad_type = x_95_pad_type_0, strides = x_95_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_115_cast_fp16)[name = string("x_95_cast_fp16")]; - string b_11_pad_type_0 = const()[name = string("b_11_pad_type_0"), val = string("valid")]; - tensor b_11_strides_0 = const()[name = string("b_11_strides_0"), val = tensor([1, 1])]; - tensor b_11_pad_0 = const()[name = string("b_11_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_11_dilations_0 = const()[name = string("b_11_dilations_0"), val = tensor([1, 1])]; - int32 b_11_groups_0 = const()[name = string("b_11_groups_0"), val = int32(1)]; - tensor model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1258344768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1264316800))))[name = string("model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_11_cast_fp16 = conv(dilations = b_11_dilations_0, groups = b_11_groups_0, pad = b_11_pad_0, pad_type = b_11_pad_type_0, strides = b_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_115_cast_fp16)[name = string("b_11_cast_fp16")]; - string var_6030_mode_0 = const()[name = string("op_6030_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_6030_cast_fp16 = gelu(mode = var_6030_mode_0, x = x_95_cast_fp16)[name = string("op_6030_cast_fp16")]; - tensor input_117_cast_fp16 = mul(x = var_6030_cast_fp16, y = b_11_cast_fp16)[name = string("input_117_cast_fp16")]; - string e_11_pad_type_0 = const()[name = string("e_11_pad_type_0"), val = string("valid")]; - tensor e_11_strides_0 = const()[name = string("e_11_strides_0"), val = tensor([1, 1])]; - tensor e_11_pad_0 = const()[name = string("e_11_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_11_dilations_0 = const()[name = string("e_11_dilations_0"), val = tensor([1, 1])]; - int32 e_11_groups_0 = const()[name = string("e_11_groups_0"), val = int32(1)]; - tensor model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160357248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166329280))))[name = string("model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_11_cast_fp16 = conv(dilations = e_11_dilations_0, groups = e_11_groups_0, pad = e_11_pad_0, pad_type = e_11_pad_type_0, strides = e_11_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_117_cast_fp16)[name = string("e_11_cast_fp16")]; - tensor var_6038_axes_0 = const()[name = string("op_6038_axes_0"), val = tensor([2])]; - tensor var_6038_cast_fp16 = squeeze(axes = var_6038_axes_0, x = e_11_cast_fp16)[name = string("op_6038_cast_fp16")]; - tensor var_6039 = const()[name = string("op_6039"), val = tensor([0, 2, 1])]; - int32 var_6050 = const()[name = string("op_6050"), val = int32(-1)]; - fp16 const_249_promoted_to_fp16 = const()[name = string("const_249_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_93_cast_fp16 = transpose(perm = var_6039, x = var_6038_cast_fp16)[name = string("transpose_180")]; - tensor var_6052_cast_fp16 = mul(x = hidden_states_93_cast_fp16, y = const_249_promoted_to_fp16)[name = string("op_6052_cast_fp16")]; - bool input_119_interleave_0 = const()[name = string("input_119_interleave_0"), val = bool(false)]; - tensor input_119_cast_fp16 = concat(axis = var_6050, interleave = input_119_interleave_0, values = (hidden_states_93_cast_fp16, var_6052_cast_fp16))[name = string("input_119_cast_fp16")]; - tensor normed_141_axes_0 = const()[name = string("normed_141_axes_0"), val = tensor([-1])]; - fp16 var_6047_to_fp16 = const()[name = string("op_6047_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_141_cast_fp16 = layer_norm(axes = normed_141_axes_0, epsilon = var_6047_to_fp16, x = input_119_cast_fp16)[name = string("normed_141_cast_fp16")]; - tensor normed_143_begin_0 = const()[name = string("normed_143_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_143_end_0 = const()[name = string("normed_143_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_143_end_mask_0 = const()[name = string("normed_143_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_143_cast_fp16 = slice_by_index(begin = normed_143_begin_0, end = normed_143_end_0, end_mask = normed_143_end_mask_0, x = normed_141_cast_fp16)[name = string("normed_143_cast_fp16")]; - tensor var_6066_to_fp16 = const()[name = string("op_6066_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166347776)))]; - tensor hidden_states_95_cast_fp16 = mul(x = normed_143_cast_fp16, y = var_6066_to_fp16)[name = string("hidden_states_95_cast_fp16")]; - tensor hidden_states_97_cast_fp16 = add(x = hidden_states_91_cast_fp16, y = hidden_states_95_cast_fp16)[name = string("hidden_states_97_cast_fp16")]; - int32 var_6120 = const()[name = string("op_6120"), val = int32(-1)]; - fp16 const_254_promoted_to_fp16 = const()[name = string("const_254_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6122_cast_fp16 = mul(x = hidden_states_97_cast_fp16, y = const_254_promoted_to_fp16)[name = string("op_6122_cast_fp16")]; - bool input_121_interleave_0 = const()[name = string("input_121_interleave_0"), val = bool(false)]; - tensor input_121_cast_fp16 = concat(axis = var_6120, interleave = input_121_interleave_0, values = (hidden_states_97_cast_fp16, var_6122_cast_fp16))[name = string("input_121_cast_fp16")]; - tensor normed_145_axes_0 = const()[name = string("normed_145_axes_0"), val = tensor([-1])]; - fp16 var_6117_to_fp16 = const()[name = string("op_6117_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_145_cast_fp16 = layer_norm(axes = normed_145_axes_0, epsilon = var_6117_to_fp16, x = input_121_cast_fp16)[name = string("normed_145_cast_fp16")]; - tensor normed_147_begin_0 = const()[name = string("normed_147_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_147_end_0 = const()[name = string("normed_147_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_147_end_mask_0 = const()[name = string("normed_147_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_147_cast_fp16 = slice_by_index(begin = normed_147_begin_0, end = normed_147_end_0, end_mask = normed_147_end_mask_0, x = normed_145_cast_fp16)[name = string("normed_147_cast_fp16")]; - tensor var_6136_to_fp16 = const()[name = string("op_6136_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166350144)))]; - tensor hidden_states_99_cast_fp16 = mul(x = normed_147_cast_fp16, y = var_6136_to_fp16)[name = string("hidden_states_99_cast_fp16")]; - tensor var_6147 = const()[name = string("op_6147"), val = tensor([0, 2, 1])]; - tensor var_6150_axes_0 = const()[name = string("op_6150_axes_0"), val = tensor([2])]; - tensor var_6148_cast_fp16 = transpose(perm = var_6147, x = hidden_states_99_cast_fp16)[name = string("transpose_179")]; - tensor var_6150_cast_fp16 = expand_dims(axes = var_6150_axes_0, x = var_6148_cast_fp16)[name = string("op_6150_cast_fp16")]; - string query_states_49_pad_type_0 = const()[name = string("query_states_49_pad_type_0"), val = string("valid")]; - tensor query_states_49_strides_0 = const()[name = string("query_states_49_strides_0"), val = tensor([1, 1])]; - tensor query_states_49_pad_0 = const()[name = string("query_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_49_dilations_0 = const()[name = string("query_states_49_dilations_0"), val = tensor([1, 1])]; - int32 query_states_49_groups_0 = const()[name = string("query_states_49_groups_0"), val = int32(1)]; - tensor query_states_49 = conv(dilations = query_states_49_dilations_0, groups = query_states_49_groups_0, pad = query_states_49_pad_0, pad_type = query_states_49_pad_type_0, strides = query_states_49_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_6150_cast_fp16)[name = string("query_states_49")]; - string key_states_61_pad_type_0 = const()[name = string("key_states_61_pad_type_0"), val = string("valid")]; - tensor key_states_61_strides_0 = const()[name = string("key_states_61_strides_0"), val = tensor([1, 1])]; - tensor key_states_61_pad_0 = const()[name = string("key_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_61_dilations_0 = const()[name = string("key_states_61_dilations_0"), val = tensor([1, 1])]; - int32 key_states_61_groups_0 = const()[name = string("key_states_61_groups_0"), val = int32(1)]; - tensor key_states_61 = conv(dilations = key_states_61_dilations_0, groups = key_states_61_groups_0, pad = key_states_61_pad_0, pad_type = key_states_61_pad_type_0, strides = key_states_61_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_6150_cast_fp16)[name = string("key_states_61")]; - string value_states_49_pad_type_0 = const()[name = string("value_states_49_pad_type_0"), val = string("valid")]; - tensor value_states_49_strides_0 = const()[name = string("value_states_49_strides_0"), val = tensor([1, 1])]; - tensor value_states_49_pad_0 = const()[name = string("value_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_49_dilations_0 = const()[name = string("value_states_49_dilations_0"), val = tensor([1, 1])]; - int32 value_states_49_groups_0 = const()[name = string("value_states_49_groups_0"), val = int32(1)]; - tensor value_states_49 = conv(dilations = value_states_49_dilations_0, groups = value_states_49_groups_0, pad = value_states_49_pad_0, pad_type = value_states_49_pad_type_0, strides = value_states_49_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_6150_cast_fp16)[name = string("value_states_49")]; - tensor var_6192 = const()[name = string("op_6192"), val = tensor([1, 4, 256, 64])]; - tensor var_6193 = reshape(shape = var_6192, x = query_states_49)[name = string("op_6193")]; - tensor var_6198 = const()[name = string("op_6198"), val = tensor([0, 1, 3, 2])]; - tensor var_6203 = const()[name = string("op_6203"), val = tensor([1, 1, 256, 64])]; - tensor var_6204 = reshape(shape = var_6203, x = key_states_61)[name = string("op_6204")]; - tensor var_6209 = const()[name = string("op_6209"), val = tensor([0, 1, 3, 2])]; - tensor var_6214 = const()[name = string("op_6214"), val = tensor([1, 1, 256, 64])]; - tensor var_6215 = reshape(shape = var_6214, x = value_states_49)[name = string("op_6215")]; - tensor var_6220 = const()[name = string("op_6220"), val = tensor([0, 1, 3, 2])]; - int32 var_6231 = const()[name = string("op_6231"), val = int32(-1)]; - fp16 const_259_promoted = const()[name = string("const_259_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_101 = transpose(perm = var_6198, x = var_6193)[name = string("transpose_178")]; - tensor var_6233 = mul(x = hidden_states_101, y = const_259_promoted)[name = string("op_6233")]; - bool input_125_interleave_0 = const()[name = string("input_125_interleave_0"), val = bool(false)]; - tensor input_125 = concat(axis = var_6231, interleave = input_125_interleave_0, values = (hidden_states_101, var_6233))[name = string("input_125")]; - tensor normed_149_axes_0 = const()[name = string("normed_149_axes_0"), val = tensor([-1])]; - fp16 var_6228_to_fp16 = const()[name = string("op_6228_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_149_cast_fp16 = layer_norm(axes = normed_149_axes_0, epsilon = var_6228_to_fp16, x = input_125)[name = string("normed_149_cast_fp16")]; - tensor normed_151_begin_0 = const()[name = string("normed_151_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_151_end_0 = const()[name = string("normed_151_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_151_end_mask_0 = const()[name = string("normed_151_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_151 = slice_by_index(begin = normed_151_begin_0, end = normed_151_end_0, end_mask = normed_151_end_mask_0, x = normed_149_cast_fp16)[name = string("normed_151")]; - tensor var_6247_to_fp16 = const()[name = string("op_6247_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166352512)))]; - tensor q_13_cast_fp16 = mul(x = normed_151, y = var_6247_to_fp16)[name = string("q_13_cast_fp16")]; - int32 var_6258 = const()[name = string("op_6258"), val = int32(-1)]; - fp16 const_263_promoted = const()[name = string("const_263_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_103 = transpose(perm = var_6209, x = var_6204)[name = string("transpose_177")]; - tensor var_6260 = mul(x = hidden_states_103, y = const_263_promoted)[name = string("op_6260")]; - bool input_127_interleave_0 = const()[name = string("input_127_interleave_0"), val = bool(false)]; - tensor input_127 = concat(axis = var_6258, interleave = input_127_interleave_0, values = (hidden_states_103, var_6260))[name = string("input_127")]; - tensor normed_153_axes_0 = const()[name = string("normed_153_axes_0"), val = tensor([-1])]; - fp16 var_6255_to_fp16 = const()[name = string("op_6255_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_153_cast_fp16 = layer_norm(axes = normed_153_axes_0, epsilon = var_6255_to_fp16, x = input_127)[name = string("normed_153_cast_fp16")]; - tensor normed_155_begin_0 = const()[name = string("normed_155_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_155_end_0 = const()[name = string("normed_155_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_155_end_mask_0 = const()[name = string("normed_155_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_155 = slice_by_index(begin = normed_155_begin_0, end = normed_155_end_0, end_mask = normed_155_end_mask_0, x = normed_153_cast_fp16)[name = string("normed_155")]; - tensor var_6274_to_fp16 = const()[name = string("op_6274_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166353088)))]; - tensor k_13_cast_fp16 = mul(x = normed_155, y = var_6274_to_fp16)[name = string("k_13_cast_fp16")]; - tensor var_6288_cast_fp16 = mul(x = q_13_cast_fp16, y = cos_5)[name = string("op_6288_cast_fp16")]; - tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13_cast_fp16)[name = string("x1_25_cast_fp16")]; - tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13_cast_fp16)[name = string("x2_25_cast_fp16")]; - fp16 const_269_promoted_to_fp16 = const()[name = string("const_269_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6309_cast_fp16 = mul(x = x2_25_cast_fp16, y = const_269_promoted_to_fp16)[name = string("op_6309_cast_fp16")]; - int32 var_6311 = const()[name = string("op_6311"), val = int32(-1)]; - bool var_6312_interleave_0 = const()[name = string("op_6312_interleave_0"), val = bool(false)]; - tensor var_6312_cast_fp16 = concat(axis = var_6311, interleave = var_6312_interleave_0, values = (var_6309_cast_fp16, x1_25_cast_fp16))[name = string("op_6312_cast_fp16")]; - tensor var_6313_cast_fp16 = mul(x = var_6312_cast_fp16, y = sin_5)[name = string("op_6313_cast_fp16")]; - tensor query_states_51_cast_fp16 = add(x = var_6288_cast_fp16, y = var_6313_cast_fp16)[name = string("query_states_51_cast_fp16")]; - tensor var_6316_cast_fp16 = mul(x = k_13_cast_fp16, y = cos_5)[name = string("op_6316_cast_fp16")]; - tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13_cast_fp16)[name = string("x1_27_cast_fp16")]; - tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13_cast_fp16)[name = string("x2_27_cast_fp16")]; - fp16 const_272_promoted_to_fp16 = const()[name = string("const_272_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6337_cast_fp16 = mul(x = x2_27_cast_fp16, y = const_272_promoted_to_fp16)[name = string("op_6337_cast_fp16")]; - int32 var_6339 = const()[name = string("op_6339"), val = int32(-1)]; - bool var_6340_interleave_0 = const()[name = string("op_6340_interleave_0"), val = bool(false)]; - tensor var_6340_cast_fp16 = concat(axis = var_6339, interleave = var_6340_interleave_0, values = (var_6337_cast_fp16, x1_27_cast_fp16))[name = string("op_6340_cast_fp16")]; - tensor var_6341_cast_fp16 = mul(x = var_6340_cast_fp16, y = sin_5)[name = string("op_6341_cast_fp16")]; - tensor key_states_63_cast_fp16 = add(x = var_6316_cast_fp16, y = var_6341_cast_fp16)[name = string("key_states_63_cast_fp16")]; - tensor key_slice_11_begin_0 = const()[name = string("key_slice_11_begin_0"), val = tensor([5, 0, 0, 0])]; - tensor key_slice_11_end_0 = const()[name = string("key_slice_11_end_0"), val = tensor([6, 1, 512, 256])]; - tensor key_slice_11_end_mask_0 = const()[name = string("key_slice_11_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_11_cast_fp16 = slice_by_index(begin = key_slice_11_begin_0, end = key_slice_11_end_0, end_mask = key_slice_11_end_mask_0, x = coreml_update_state_61)[name = string("key_slice_11_cast_fp16")]; - tensor var_6378_begin_0 = const()[name = string("op_6378_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_6378_end_0 = const()[name = string("op_6378_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_6378_end_mask_0 = const()[name = string("op_6378_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_6378_cast_fp16 = slice_by_index(begin = var_6378_begin_0, end = var_6378_end_0, end_mask = var_6378_end_mask_0, x = key_slice_11_cast_fp16)[name = string("op_6378_cast_fp16")]; - int32 var_6405 = const()[name = string("op_6405"), val = int32(2)]; - bool shifted_key_11_interleave_0 = const()[name = string("shifted_key_11_interleave_0"), val = bool(false)]; - tensor shifted_key_11_cast_fp16 = concat(axis = var_6405, interleave = shifted_key_11_interleave_0, values = (var_6378_cast_fp16, key_states_63_cast_fp16))[name = string("shifted_key_11_cast_fp16")]; - tensor concat_88 = const()[name = string("concat_88"), val = tensor([5, 0, 0, 0])]; - tensor concat_89 = const()[name = string("concat_89"), val = tensor([6, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_88, begin_mask = model_model_kv_cache_local_internal_tensor_assign_11_begin_mask_0, end = concat_89, end_mask = model_model_kv_cache_local_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_11_stride_0, update = shifted_key_11_cast_fp16, x = coreml_update_state_61)[name = string("model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_168_write_state")]; - tensor coreml_update_state_64 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_168")]; - tensor value_slice_11_begin_0 = const()[name = string("value_slice_11_begin_0"), val = tensor([27, 0, 0, 0])]; - tensor value_slice_11_end_0 = const()[name = string("value_slice_11_end_0"), val = tensor([28, 1, 512, 256])]; - tensor value_slice_11_end_mask_0 = const()[name = string("value_slice_11_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_11_cast_fp16 = slice_by_index(begin = value_slice_11_begin_0, end = value_slice_11_end_0, end_mask = value_slice_11_end_mask_0, x = coreml_update_state_64)[name = string("value_slice_11_cast_fp16")]; - tensor var_6448_begin_0 = const()[name = string("op_6448_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_6448_end_0 = const()[name = string("op_6448_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_6448_end_mask_0 = const()[name = string("op_6448_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_6448_cast_fp16 = slice_by_index(begin = var_6448_begin_0, end = var_6448_end_0, end_mask = var_6448_end_mask_0, x = value_slice_11_cast_fp16)[name = string("op_6448_cast_fp16")]; - int32 var_6475 = const()[name = string("op_6475"), val = int32(2)]; - bool shifted_value_11_interleave_0 = const()[name = string("shifted_value_11_interleave_0"), val = bool(false)]; - tensor value_states_51 = transpose(perm = var_6220, x = var_6215)[name = string("transpose_176")]; - tensor shifted_value_11_cast_fp16 = concat(axis = var_6475, interleave = shifted_value_11_interleave_0, values = (var_6448_cast_fp16, value_states_51))[name = string("shifted_value_11_cast_fp16")]; - tensor concat_90 = const()[name = string("concat_90"), val = tensor([27, 0, 0, 0])]; - tensor concat_91 = const()[name = string("concat_91"), val = tensor([28, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_90, begin_mask = model_model_kv_cache_local_internal_tensor_assign_12_begin_mask_0, end = concat_91, end_mask = model_model_kv_cache_local_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_12_stride_0, update = shifted_value_11_cast_fp16, x = coreml_update_state_64)[name = string("model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_169_write_state")]; - tensor coreml_update_state_65 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_169")]; - tensor var_6503_begin_0 = const()[name = string("op_6503_begin_0"), val = tensor([5, 0, 0, 0])]; - tensor var_6503_end_0 = const()[name = string("op_6503_end_0"), val = tensor([6, 1, 512, 256])]; - tensor var_6503_end_mask_0 = const()[name = string("op_6503_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_6503_cast_fp16 = slice_by_index(begin = var_6503_begin_0, end = var_6503_end_0, end_mask = var_6503_end_mask_0, x = coreml_update_state_65)[name = string("op_6503_cast_fp16")]; - tensor var_6510_begin_0 = const()[name = string("op_6510_begin_0"), val = tensor([27, 0, 0, 0])]; - tensor var_6510_end_0 = const()[name = string("op_6510_end_0"), val = tensor([28, 1, 512, 256])]; - tensor var_6510_end_mask_0 = const()[name = string("op_6510_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_6510_cast_fp16 = slice_by_index(begin = var_6510_begin_0, end = var_6510_end_0, end_mask = var_6510_end_mask_0, x = coreml_update_state_65)[name = string("op_6510_cast_fp16")]; - tensor var_6549 = const()[name = string("op_6549"), val = tensor([1, 4, 1, 1])]; - tensor x_101_cast_fp16 = tile(reps = var_6549, x = var_6503_cast_fp16)[name = string("x_101_cast_fp16")]; - tensor var_6569 = const()[name = string("op_6569"), val = tensor([1, 4, 1, 1])]; - tensor x_107_cast_fp16 = tile(reps = var_6569, x = var_6510_cast_fp16)[name = string("x_107_cast_fp16")]; - bool var_6596_transpose_x_0 = const()[name = string("op_6596_transpose_x_0"), val = bool(false)]; - bool var_6596_transpose_y_0 = const()[name = string("op_6596_transpose_y_0"), val = bool(true)]; - tensor var_6596 = matmul(transpose_x = var_6596_transpose_x_0, transpose_y = var_6596_transpose_y_0, x = query_states_51_cast_fp16, y = x_101_cast_fp16)[name = string("op_6596")]; - fp16 var_6597_to_fp16 = const()[name = string("op_6597_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_25_cast_fp16 = mul(x = var_6596, y = var_6597_to_fp16)[name = string("attn_weights_25_cast_fp16")]; - tensor attn_weights_27_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = mask_slice_1)[name = string("attn_weights_27_cast_fp16")]; - int32 var_6632 = const()[name = string("op_6632"), val = int32(-1)]; - tensor var_6634_cast_fp16 = softmax(axis = var_6632, x = attn_weights_27_cast_fp16)[name = string("op_6634_cast_fp16")]; - tensor concat_96 = const()[name = string("concat_96"), val = tensor([4, 64, 512])]; - tensor reshape_18_cast_fp16 = reshape(shape = concat_96, x = var_6634_cast_fp16)[name = string("reshape_18_cast_fp16")]; - tensor concat_97 = const()[name = string("concat_97"), val = tensor([4, 512, 256])]; - tensor reshape_19_cast_fp16 = reshape(shape = concat_97, x = x_107_cast_fp16)[name = string("reshape_19_cast_fp16")]; - bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)]; - bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(false)]; - tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_18_cast_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")]; - tensor concat_101 = const()[name = string("concat_101"), val = tensor([1, 4, 64, 256])]; - tensor reshape_20_cast_fp16 = reshape(shape = concat_101, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")]; - tensor var_6646_perm_0 = const()[name = string("op_6646_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_6665 = const()[name = string("op_6665"), val = tensor([1, 64, 1024])]; - tensor var_6646_cast_fp16 = transpose(perm = var_6646_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_175")]; - tensor attn_output_65_cast_fp16 = reshape(shape = var_6665, x = var_6646_cast_fp16)[name = string("attn_output_65_cast_fp16")]; - tensor var_6670 = const()[name = string("op_6670"), val = tensor([0, 2, 1])]; - string var_6686_pad_type_0 = const()[name = string("op_6686_pad_type_0"), val = string("valid")]; - int32 var_6686_groups_0 = const()[name = string("op_6686_groups_0"), val = int32(1)]; - tensor var_6686_strides_0 = const()[name = string("op_6686_strides_0"), val = tensor([1])]; - tensor var_6686_pad_0 = const()[name = string("op_6686_pad_0"), val = tensor([0, 0])]; - tensor var_6686_dilations_0 = const()[name = string("op_6686_dilations_0"), val = tensor([1])]; - tensor squeeze_6_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166353664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167238464))))[name = string("squeeze_6_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_6671_cast_fp16 = transpose(perm = var_6670, x = attn_output_65_cast_fp16)[name = string("transpose_174")]; - tensor var_6686_cast_fp16 = conv(dilations = var_6686_dilations_0, groups = var_6686_groups_0, pad = var_6686_pad_0, pad_type = var_6686_pad_type_0, strides = var_6686_strides_0, weight = squeeze_6_cast_fp16_to_fp32_to_fp16_palettized, x = var_6671_cast_fp16)[name = string("op_6686_cast_fp16")]; - tensor var_6690 = const()[name = string("op_6690"), val = tensor([0, 2, 1])]; - int32 var_6701 = const()[name = string("op_6701"), val = int32(-1)]; - fp16 const_283_promoted_to_fp16 = const()[name = string("const_283_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_105_cast_fp16 = transpose(perm = var_6690, x = var_6686_cast_fp16)[name = string("transpose_173")]; - tensor var_6703_cast_fp16 = mul(x = hidden_states_105_cast_fp16, y = const_283_promoted_to_fp16)[name = string("op_6703_cast_fp16")]; - bool input_131_interleave_0 = const()[name = string("input_131_interleave_0"), val = bool(false)]; - tensor input_131_cast_fp16 = concat(axis = var_6701, interleave = input_131_interleave_0, values = (hidden_states_105_cast_fp16, var_6703_cast_fp16))[name = string("input_131_cast_fp16")]; - tensor normed_157_axes_0 = const()[name = string("normed_157_axes_0"), val = tensor([-1])]; - fp16 var_6698_to_fp16 = const()[name = string("op_6698_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_157_cast_fp16 = layer_norm(axes = normed_157_axes_0, epsilon = var_6698_to_fp16, x = input_131_cast_fp16)[name = string("normed_157_cast_fp16")]; - tensor normed_159_begin_0 = const()[name = string("normed_159_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_159_end_0 = const()[name = string("normed_159_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_159_end_mask_0 = const()[name = string("normed_159_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_159_cast_fp16 = slice_by_index(begin = normed_159_begin_0, end = normed_159_end_0, end_mask = normed_159_end_mask_0, x = normed_157_cast_fp16)[name = string("normed_159_cast_fp16")]; - tensor var_6717_to_fp16 = const()[name = string("op_6717_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167256960)))]; - tensor attn_output_69_cast_fp16 = mul(x = normed_159_cast_fp16, y = var_6717_to_fp16)[name = string("attn_output_69_cast_fp16")]; - tensor hidden_states_107_cast_fp16 = add(x = hidden_states_97_cast_fp16, y = attn_output_69_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; - int32 var_6730 = const()[name = string("op_6730"), val = int32(-1)]; - fp16 const_287_promoted_to_fp16 = const()[name = string("const_287_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6732_cast_fp16 = mul(x = hidden_states_107_cast_fp16, y = const_287_promoted_to_fp16)[name = string("op_6732_cast_fp16")]; - bool input_133_interleave_0 = const()[name = string("input_133_interleave_0"), val = bool(false)]; - tensor input_133_cast_fp16 = concat(axis = var_6730, interleave = input_133_interleave_0, values = (hidden_states_107_cast_fp16, var_6732_cast_fp16))[name = string("input_133_cast_fp16")]; - tensor normed_161_axes_0 = const()[name = string("normed_161_axes_0"), val = tensor([-1])]; - fp16 var_6727_to_fp16 = const()[name = string("op_6727_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_161_cast_fp16 = layer_norm(axes = normed_161_axes_0, epsilon = var_6727_to_fp16, x = input_133_cast_fp16)[name = string("normed_161_cast_fp16")]; - tensor normed_163_begin_0 = const()[name = string("normed_163_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_163_end_0 = const()[name = string("normed_163_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_163_end_mask_0 = const()[name = string("normed_163_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_163_cast_fp16 = slice_by_index(begin = normed_163_begin_0, end = normed_163_end_0, end_mask = normed_163_end_mask_0, x = normed_161_cast_fp16)[name = string("normed_163_cast_fp16")]; - tensor var_6746_to_fp16 = const()[name = string("op_6746_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167259328)))]; - tensor x_109_cast_fp16 = mul(x = normed_163_cast_fp16, y = var_6746_to_fp16)[name = string("x_109_cast_fp16")]; - tensor var_6758 = const()[name = string("op_6758"), val = tensor([0, 2, 1])]; - tensor input_135_axes_0 = const()[name = string("input_135_axes_0"), val = tensor([2])]; - tensor var_6759_cast_fp16 = transpose(perm = var_6758, x = x_109_cast_fp16)[name = string("transpose_172")]; - tensor input_135_cast_fp16 = expand_dims(axes = input_135_axes_0, x = var_6759_cast_fp16)[name = string("input_135_cast_fp16")]; - string x_111_pad_type_0 = const()[name = string("x_111_pad_type_0"), val = string("valid")]; - tensor x_111_strides_0 = const()[name = string("x_111_strides_0"), val = tensor([1, 1])]; - tensor x_111_pad_0 = const()[name = string("x_111_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_111_dilations_0 = const()[name = string("x_111_dilations_0"), val = tensor([1, 1])]; - int32 x_111_groups_0 = const()[name = string("x_111_groups_0"), val = int32(1)]; - tensor model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1264427456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1270399488))))[name = string("model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_111_cast_fp16 = conv(dilations = x_111_dilations_0, groups = x_111_groups_0, pad = x_111_pad_0, pad_type = x_111_pad_type_0, strides = x_111_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_135_cast_fp16)[name = string("x_111_cast_fp16")]; - string b_13_pad_type_0 = const()[name = string("b_13_pad_type_0"), val = string("valid")]; - tensor b_13_strides_0 = const()[name = string("b_13_strides_0"), val = tensor([1, 1])]; - tensor b_13_pad_0 = const()[name = string("b_13_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_13_dilations_0 = const()[name = string("b_13_dilations_0"), val = tensor([1, 1])]; - int32 b_13_groups_0 = const()[name = string("b_13_groups_0"), val = int32(1)]; - tensor model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1270510144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1276482176))))[name = string("model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_13_cast_fp16 = conv(dilations = b_13_dilations_0, groups = b_13_groups_0, pad = b_13_pad_0, pad_type = b_13_pad_type_0, strides = b_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_135_cast_fp16)[name = string("b_13_cast_fp16")]; - string var_6784_mode_0 = const()[name = string("op_6784_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_6784_cast_fp16 = gelu(mode = var_6784_mode_0, x = x_111_cast_fp16)[name = string("op_6784_cast_fp16")]; - tensor input_137_cast_fp16 = mul(x = var_6784_cast_fp16, y = b_13_cast_fp16)[name = string("input_137_cast_fp16")]; - string e_13_pad_type_0 = const()[name = string("e_13_pad_type_0"), val = string("valid")]; - tensor e_13_strides_0 = const()[name = string("e_13_strides_0"), val = tensor([1, 1])]; - tensor e_13_pad_0 = const()[name = string("e_13_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_13_dilations_0 = const()[name = string("e_13_dilations_0"), val = tensor([1, 1])]; - int32 e_13_groups_0 = const()[name = string("e_13_groups_0"), val = int32(1)]; - tensor model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179427072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185399104))))[name = string("model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_13_cast_fp16 = conv(dilations = e_13_dilations_0, groups = e_13_groups_0, pad = e_13_pad_0, pad_type = e_13_pad_type_0, strides = e_13_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_137_cast_fp16)[name = string("e_13_cast_fp16")]; - tensor var_6792_axes_0 = const()[name = string("op_6792_axes_0"), val = tensor([2])]; - tensor var_6792_cast_fp16 = squeeze(axes = var_6792_axes_0, x = e_13_cast_fp16)[name = string("op_6792_cast_fp16")]; - tensor var_6793 = const()[name = string("op_6793"), val = tensor([0, 2, 1])]; - int32 var_6804 = const()[name = string("op_6804"), val = int32(-1)]; - fp16 const_291_promoted_to_fp16 = const()[name = string("const_291_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_109_cast_fp16 = transpose(perm = var_6793, x = var_6792_cast_fp16)[name = string("transpose_171")]; - tensor var_6806_cast_fp16 = mul(x = hidden_states_109_cast_fp16, y = const_291_promoted_to_fp16)[name = string("op_6806_cast_fp16")]; - bool input_139_interleave_0 = const()[name = string("input_139_interleave_0"), val = bool(false)]; - tensor input_139_cast_fp16 = concat(axis = var_6804, interleave = input_139_interleave_0, values = (hidden_states_109_cast_fp16, var_6806_cast_fp16))[name = string("input_139_cast_fp16")]; - tensor normed_165_axes_0 = const()[name = string("normed_165_axes_0"), val = tensor([-1])]; - fp16 var_6801_to_fp16 = const()[name = string("op_6801_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_165_cast_fp16 = layer_norm(axes = normed_165_axes_0, epsilon = var_6801_to_fp16, x = input_139_cast_fp16)[name = string("normed_165_cast_fp16")]; - tensor normed_167_begin_0 = const()[name = string("normed_167_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_167_end_0 = const()[name = string("normed_167_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_167_end_mask_0 = const()[name = string("normed_167_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_167_cast_fp16 = slice_by_index(begin = normed_167_begin_0, end = normed_167_end_0, end_mask = normed_167_end_mask_0, x = normed_165_cast_fp16)[name = string("normed_167_cast_fp16")]; - tensor var_6820_to_fp16 = const()[name = string("op_6820_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185417600)))]; - tensor hidden_states_111_cast_fp16 = mul(x = normed_167_cast_fp16, y = var_6820_to_fp16)[name = string("hidden_states_111_cast_fp16")]; - tensor hidden_states_113_cast_fp16 = add(x = hidden_states_107_cast_fp16, y = hidden_states_111_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; - int32 var_6874 = const()[name = string("op_6874"), val = int32(-1)]; - fp16 const_296_promoted_to_fp16 = const()[name = string("const_296_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_6876_cast_fp16 = mul(x = hidden_states_113_cast_fp16, y = const_296_promoted_to_fp16)[name = string("op_6876_cast_fp16")]; - bool input_141_interleave_0 = const()[name = string("input_141_interleave_0"), val = bool(false)]; - tensor input_141_cast_fp16 = concat(axis = var_6874, interleave = input_141_interleave_0, values = (hidden_states_113_cast_fp16, var_6876_cast_fp16))[name = string("input_141_cast_fp16")]; - tensor normed_169_axes_0 = const()[name = string("normed_169_axes_0"), val = tensor([-1])]; - fp16 var_6871_to_fp16 = const()[name = string("op_6871_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_169_cast_fp16 = layer_norm(axes = normed_169_axes_0, epsilon = var_6871_to_fp16, x = input_141_cast_fp16)[name = string("normed_169_cast_fp16")]; - tensor normed_171_begin_0 = const()[name = string("normed_171_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_171_end_0 = const()[name = string("normed_171_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_171_end_mask_0 = const()[name = string("normed_171_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_171_cast_fp16 = slice_by_index(begin = normed_171_begin_0, end = normed_171_end_0, end_mask = normed_171_end_mask_0, x = normed_169_cast_fp16)[name = string("normed_171_cast_fp16")]; - tensor var_6890_to_fp16 = const()[name = string("op_6890_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185419968)))]; - tensor hidden_states_115_cast_fp16 = mul(x = normed_171_cast_fp16, y = var_6890_to_fp16)[name = string("hidden_states_115_cast_fp16")]; - tensor var_6901 = const()[name = string("op_6901"), val = tensor([0, 2, 1])]; - tensor var_6904_axes_0 = const()[name = string("op_6904_axes_0"), val = tensor([2])]; - tensor var_6902_cast_fp16 = transpose(perm = var_6901, x = hidden_states_115_cast_fp16)[name = string("transpose_170")]; - tensor var_6904_cast_fp16 = expand_dims(axes = var_6904_axes_0, x = var_6902_cast_fp16)[name = string("op_6904_cast_fp16")]; - string query_states_57_pad_type_0 = const()[name = string("query_states_57_pad_type_0"), val = string("valid")]; - tensor query_states_57_strides_0 = const()[name = string("query_states_57_strides_0"), val = tensor([1, 1])]; - tensor query_states_57_pad_0 = const()[name = string("query_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_57_dilations_0 = const()[name = string("query_states_57_dilations_0"), val = tensor([1, 1])]; - int32 query_states_57_groups_0 = const()[name = string("query_states_57_groups_0"), val = int32(1)]; - tensor query_states_57 = conv(dilations = query_states_57_dilations_0, groups = query_states_57_groups_0, pad = query_states_57_pad_0, pad_type = query_states_57_pad_type_0, strides = query_states_57_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_6904_cast_fp16)[name = string("query_states_57")]; - string key_states_71_pad_type_0 = const()[name = string("key_states_71_pad_type_0"), val = string("valid")]; - tensor key_states_71_strides_0 = const()[name = string("key_states_71_strides_0"), val = tensor([1, 1])]; - tensor key_states_71_pad_0 = const()[name = string("key_states_71_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_71_dilations_0 = const()[name = string("key_states_71_dilations_0"), val = tensor([1, 1])]; - int32 key_states_71_groups_0 = const()[name = string("key_states_71_groups_0"), val = int32(1)]; - tensor key_states_71 = conv(dilations = key_states_71_dilations_0, groups = key_states_71_groups_0, pad = key_states_71_pad_0, pad_type = key_states_71_pad_type_0, strides = key_states_71_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_6904_cast_fp16)[name = string("key_states_71")]; - string value_states_57_pad_type_0 = const()[name = string("value_states_57_pad_type_0"), val = string("valid")]; - tensor value_states_57_strides_0 = const()[name = string("value_states_57_strides_0"), val = tensor([1, 1])]; - tensor value_states_57_pad_0 = const()[name = string("value_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_57_dilations_0 = const()[name = string("value_states_57_dilations_0"), val = tensor([1, 1])]; - int32 value_states_57_groups_0 = const()[name = string("value_states_57_groups_0"), val = int32(1)]; - tensor value_states_57 = conv(dilations = value_states_57_dilations_0, groups = value_states_57_groups_0, pad = value_states_57_pad_0, pad_type = value_states_57_pad_type_0, strides = value_states_57_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_6904_cast_fp16)[name = string("value_states_57")]; - tensor var_6946 = const()[name = string("op_6946"), val = tensor([1, 4, 256, 64])]; - tensor var_6947 = reshape(shape = var_6946, x = query_states_57)[name = string("op_6947")]; - tensor var_6952 = const()[name = string("op_6952"), val = tensor([0, 1, 3, 2])]; - tensor var_6957 = const()[name = string("op_6957"), val = tensor([1, 1, 256, 64])]; - tensor var_6958 = reshape(shape = var_6957, x = key_states_71)[name = string("op_6958")]; - tensor var_6963 = const()[name = string("op_6963"), val = tensor([0, 1, 3, 2])]; - tensor var_6968 = const()[name = string("op_6968"), val = tensor([1, 1, 256, 64])]; - tensor var_6969 = reshape(shape = var_6968, x = value_states_57)[name = string("op_6969")]; - tensor var_6974 = const()[name = string("op_6974"), val = tensor([0, 1, 3, 2])]; - int32 var_6985 = const()[name = string("op_6985"), val = int32(-1)]; - fp16 const_301_promoted = const()[name = string("const_301_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_117 = transpose(perm = var_6952, x = var_6947)[name = string("transpose_169")]; - tensor var_6987 = mul(x = hidden_states_117, y = const_301_promoted)[name = string("op_6987")]; - bool input_145_interleave_0 = const()[name = string("input_145_interleave_0"), val = bool(false)]; - tensor input_145 = concat(axis = var_6985, interleave = input_145_interleave_0, values = (hidden_states_117, var_6987))[name = string("input_145")]; - tensor normed_173_axes_0 = const()[name = string("normed_173_axes_0"), val = tensor([-1])]; - fp16 var_6982_to_fp16 = const()[name = string("op_6982_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_173_cast_fp16 = layer_norm(axes = normed_173_axes_0, epsilon = var_6982_to_fp16, x = input_145)[name = string("normed_173_cast_fp16")]; - tensor normed_175_begin_0 = const()[name = string("normed_175_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_175_end_0 = const()[name = string("normed_175_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_175_end_mask_0 = const()[name = string("normed_175_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_175 = slice_by_index(begin = normed_175_begin_0, end = normed_175_end_0, end_mask = normed_175_end_mask_0, x = normed_173_cast_fp16)[name = string("normed_175")]; - tensor var_7001_to_fp16 = const()[name = string("op_7001_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185422336)))]; - tensor q_15_cast_fp16 = mul(x = normed_175, y = var_7001_to_fp16)[name = string("q_15_cast_fp16")]; - int32 var_7012 = const()[name = string("op_7012"), val = int32(-1)]; - fp16 const_305_promoted = const()[name = string("const_305_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_119 = transpose(perm = var_6963, x = var_6958)[name = string("transpose_168")]; - tensor var_7014 = mul(x = hidden_states_119, y = const_305_promoted)[name = string("op_7014")]; - bool input_147_interleave_0 = const()[name = string("input_147_interleave_0"), val = bool(false)]; - tensor input_147 = concat(axis = var_7012, interleave = input_147_interleave_0, values = (hidden_states_119, var_7014))[name = string("input_147")]; - tensor normed_177_axes_0 = const()[name = string("normed_177_axes_0"), val = tensor([-1])]; - fp16 var_7009_to_fp16 = const()[name = string("op_7009_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_177_cast_fp16 = layer_norm(axes = normed_177_axes_0, epsilon = var_7009_to_fp16, x = input_147)[name = string("normed_177_cast_fp16")]; - tensor normed_179_begin_0 = const()[name = string("normed_179_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_179_end_0 = const()[name = string("normed_179_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_179_end_mask_0 = const()[name = string("normed_179_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_179 = slice_by_index(begin = normed_179_begin_0, end = normed_179_end_0, end_mask = normed_179_end_mask_0, x = normed_177_cast_fp16)[name = string("normed_179")]; - tensor var_7028_to_fp16 = const()[name = string("op_7028_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185422912)))]; - tensor k_15_cast_fp16 = mul(x = normed_179, y = var_7028_to_fp16)[name = string("k_15_cast_fp16")]; - tensor var_7042_cast_fp16 = mul(x = q_15_cast_fp16, y = cos_5)[name = string("op_7042_cast_fp16")]; - tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15_cast_fp16)[name = string("x1_29_cast_fp16")]; - tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15_cast_fp16)[name = string("x2_29_cast_fp16")]; - fp16 const_311_promoted_to_fp16 = const()[name = string("const_311_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7063_cast_fp16 = mul(x = x2_29_cast_fp16, y = const_311_promoted_to_fp16)[name = string("op_7063_cast_fp16")]; - int32 var_7065 = const()[name = string("op_7065"), val = int32(-1)]; - bool var_7066_interleave_0 = const()[name = string("op_7066_interleave_0"), val = bool(false)]; - tensor var_7066_cast_fp16 = concat(axis = var_7065, interleave = var_7066_interleave_0, values = (var_7063_cast_fp16, x1_29_cast_fp16))[name = string("op_7066_cast_fp16")]; - tensor var_7067_cast_fp16 = mul(x = var_7066_cast_fp16, y = sin_5)[name = string("op_7067_cast_fp16")]; - tensor query_states_59_cast_fp16 = add(x = var_7042_cast_fp16, y = var_7067_cast_fp16)[name = string("query_states_59_cast_fp16")]; - tensor var_7070_cast_fp16 = mul(x = k_15_cast_fp16, y = cos_5)[name = string("op_7070_cast_fp16")]; - tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15_cast_fp16)[name = string("x1_31_cast_fp16")]; - tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15_cast_fp16)[name = string("x2_31_cast_fp16")]; - fp16 const_314_promoted_to_fp16 = const()[name = string("const_314_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7091_cast_fp16 = mul(x = x2_31_cast_fp16, y = const_314_promoted_to_fp16)[name = string("op_7091_cast_fp16")]; - int32 var_7093 = const()[name = string("op_7093"), val = int32(-1)]; - bool var_7094_interleave_0 = const()[name = string("op_7094_interleave_0"), val = bool(false)]; - tensor var_7094_cast_fp16 = concat(axis = var_7093, interleave = var_7094_interleave_0, values = (var_7091_cast_fp16, x1_31_cast_fp16))[name = string("op_7094_cast_fp16")]; - tensor var_7095_cast_fp16 = mul(x = var_7094_cast_fp16, y = sin_5)[name = string("op_7095_cast_fp16")]; - tensor key_states_73_cast_fp16 = add(x = var_7070_cast_fp16, y = var_7095_cast_fp16)[name = string("key_states_73_cast_fp16")]; - tensor key_slice_13_begin_0 = const()[name = string("key_slice_13_begin_0"), val = tensor([6, 0, 0, 0])]; - tensor key_slice_13_end_0 = const()[name = string("key_slice_13_end_0"), val = tensor([7, 1, 512, 256])]; - tensor key_slice_13_end_mask_0 = const()[name = string("key_slice_13_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_13_cast_fp16 = slice_by_index(begin = key_slice_13_begin_0, end = key_slice_13_end_0, end_mask = key_slice_13_end_mask_0, x = coreml_update_state_65)[name = string("key_slice_13_cast_fp16")]; - tensor var_7132_begin_0 = const()[name = string("op_7132_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_7132_end_0 = const()[name = string("op_7132_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_7132_end_mask_0 = const()[name = string("op_7132_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_7132_cast_fp16 = slice_by_index(begin = var_7132_begin_0, end = var_7132_end_0, end_mask = var_7132_end_mask_0, x = key_slice_13_cast_fp16)[name = string("op_7132_cast_fp16")]; - int32 var_7159 = const()[name = string("op_7159"), val = int32(2)]; - bool shifted_key_13_interleave_0 = const()[name = string("shifted_key_13_interleave_0"), val = bool(false)]; - tensor shifted_key_13_cast_fp16 = concat(axis = var_7159, interleave = shifted_key_13_interleave_0, values = (var_7132_cast_fp16, key_states_73_cast_fp16))[name = string("shifted_key_13_cast_fp16")]; - tensor concat_102 = const()[name = string("concat_102"), val = tensor([6, 0, 0, 0])]; - tensor concat_103 = const()[name = string("concat_103"), val = tensor([7, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_102, begin_mask = model_model_kv_cache_local_internal_tensor_assign_13_begin_mask_0, end = concat_103, end_mask = model_model_kv_cache_local_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_13_stride_0, update = shifted_key_13_cast_fp16, x = coreml_update_state_65)[name = string("model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_170_write_state")]; - tensor coreml_update_state_66 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_170")]; - tensor value_slice_13_begin_0 = const()[name = string("value_slice_13_begin_0"), val = tensor([28, 0, 0, 0])]; - tensor value_slice_13_end_0 = const()[name = string("value_slice_13_end_0"), val = tensor([29, 1, 512, 256])]; - tensor value_slice_13_end_mask_0 = const()[name = string("value_slice_13_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_13_cast_fp16 = slice_by_index(begin = value_slice_13_begin_0, end = value_slice_13_end_0, end_mask = value_slice_13_end_mask_0, x = coreml_update_state_66)[name = string("value_slice_13_cast_fp16")]; - tensor var_7202_begin_0 = const()[name = string("op_7202_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_7202_end_0 = const()[name = string("op_7202_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_7202_end_mask_0 = const()[name = string("op_7202_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_7202_cast_fp16 = slice_by_index(begin = var_7202_begin_0, end = var_7202_end_0, end_mask = var_7202_end_mask_0, x = value_slice_13_cast_fp16)[name = string("op_7202_cast_fp16")]; - int32 var_7229 = const()[name = string("op_7229"), val = int32(2)]; - bool shifted_value_13_interleave_0 = const()[name = string("shifted_value_13_interleave_0"), val = bool(false)]; - tensor value_states_59 = transpose(perm = var_6974, x = var_6969)[name = string("transpose_167")]; - tensor shifted_value_13_cast_fp16 = concat(axis = var_7229, interleave = shifted_value_13_interleave_0, values = (var_7202_cast_fp16, value_states_59))[name = string("shifted_value_13_cast_fp16")]; - tensor concat_104 = const()[name = string("concat_104"), val = tensor([28, 0, 0, 0])]; - tensor concat_105 = const()[name = string("concat_105"), val = tensor([29, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_104, begin_mask = model_model_kv_cache_local_internal_tensor_assign_14_begin_mask_0, end = concat_105, end_mask = model_model_kv_cache_local_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_14_stride_0, update = shifted_value_13_cast_fp16, x = coreml_update_state_66)[name = string("model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_171_write_state")]; - tensor coreml_update_state_67 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_171")]; - tensor var_7257_begin_0 = const()[name = string("op_7257_begin_0"), val = tensor([6, 0, 0, 0])]; - tensor var_7257_end_0 = const()[name = string("op_7257_end_0"), val = tensor([7, 1, 512, 256])]; - tensor var_7257_end_mask_0 = const()[name = string("op_7257_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_7257_cast_fp16 = slice_by_index(begin = var_7257_begin_0, end = var_7257_end_0, end_mask = var_7257_end_mask_0, x = coreml_update_state_67)[name = string("op_7257_cast_fp16")]; - tensor var_7264_begin_0 = const()[name = string("op_7264_begin_0"), val = tensor([28, 0, 0, 0])]; - tensor var_7264_end_0 = const()[name = string("op_7264_end_0"), val = tensor([29, 1, 512, 256])]; - tensor var_7264_end_mask_0 = const()[name = string("op_7264_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_7264_cast_fp16 = slice_by_index(begin = var_7264_begin_0, end = var_7264_end_0, end_mask = var_7264_end_mask_0, x = coreml_update_state_67)[name = string("op_7264_cast_fp16")]; - tensor var_7303 = const()[name = string("op_7303"), val = tensor([1, 4, 1, 1])]; - tensor x_117_cast_fp16 = tile(reps = var_7303, x = var_7257_cast_fp16)[name = string("x_117_cast_fp16")]; - tensor var_7323 = const()[name = string("op_7323"), val = tensor([1, 4, 1, 1])]; - tensor x_123_cast_fp16 = tile(reps = var_7323, x = var_7264_cast_fp16)[name = string("x_123_cast_fp16")]; - bool var_7350_transpose_x_0 = const()[name = string("op_7350_transpose_x_0"), val = bool(false)]; - bool var_7350_transpose_y_0 = const()[name = string("op_7350_transpose_y_0"), val = bool(true)]; - tensor var_7350 = matmul(transpose_x = var_7350_transpose_x_0, transpose_y = var_7350_transpose_y_0, x = query_states_59_cast_fp16, y = x_117_cast_fp16)[name = string("op_7350")]; - fp16 var_7351_to_fp16 = const()[name = string("op_7351_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_29_cast_fp16 = mul(x = var_7350, y = var_7351_to_fp16)[name = string("attn_weights_29_cast_fp16")]; - tensor attn_weights_31_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = mask_slice_1)[name = string("attn_weights_31_cast_fp16")]; - int32 var_7386 = const()[name = string("op_7386"), val = int32(-1)]; - tensor var_7388_cast_fp16 = softmax(axis = var_7386, x = attn_weights_31_cast_fp16)[name = string("op_7388_cast_fp16")]; - tensor concat_110 = const()[name = string("concat_110"), val = tensor([4, 64, 512])]; - tensor reshape_21_cast_fp16 = reshape(shape = concat_110, x = var_7388_cast_fp16)[name = string("reshape_21_cast_fp16")]; - tensor concat_111 = const()[name = string("concat_111"), val = tensor([4, 512, 256])]; - tensor reshape_22_cast_fp16 = reshape(shape = concat_111, x = x_123_cast_fp16)[name = string("reshape_22_cast_fp16")]; - bool matmul_7_transpose_x_0 = const()[name = string("matmul_7_transpose_x_0"), val = bool(false)]; - bool matmul_7_transpose_y_0 = const()[name = string("matmul_7_transpose_y_0"), val = bool(false)]; - tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = reshape_21_cast_fp16, y = reshape_22_cast_fp16)[name = string("matmul_7_cast_fp16")]; - tensor concat_115 = const()[name = string("concat_115"), val = tensor([1, 4, 64, 256])]; - tensor reshape_23_cast_fp16 = reshape(shape = concat_115, x = matmul_7_cast_fp16)[name = string("reshape_23_cast_fp16")]; - tensor var_7400_perm_0 = const()[name = string("op_7400_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_7419 = const()[name = string("op_7419"), val = tensor([1, 64, 1024])]; - tensor var_7400_cast_fp16 = transpose(perm = var_7400_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_166")]; - tensor attn_output_75_cast_fp16 = reshape(shape = var_7419, x = var_7400_cast_fp16)[name = string("attn_output_75_cast_fp16")]; - tensor var_7424 = const()[name = string("op_7424"), val = tensor([0, 2, 1])]; - string var_7440_pad_type_0 = const()[name = string("op_7440_pad_type_0"), val = string("valid")]; - int32 var_7440_groups_0 = const()[name = string("op_7440_groups_0"), val = int32(1)]; - tensor var_7440_strides_0 = const()[name = string("op_7440_strides_0"), val = tensor([1])]; - tensor var_7440_pad_0 = const()[name = string("op_7440_pad_0"), val = tensor([0, 0])]; - tensor var_7440_dilations_0 = const()[name = string("op_7440_dilations_0"), val = tensor([1])]; - tensor squeeze_7_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185423488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186308288))))[name = string("squeeze_7_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_7425_cast_fp16 = transpose(perm = var_7424, x = attn_output_75_cast_fp16)[name = string("transpose_165")]; - tensor var_7440_cast_fp16 = conv(dilations = var_7440_dilations_0, groups = var_7440_groups_0, pad = var_7440_pad_0, pad_type = var_7440_pad_type_0, strides = var_7440_strides_0, weight = squeeze_7_cast_fp16_to_fp32_to_fp16_palettized, x = var_7425_cast_fp16)[name = string("op_7440_cast_fp16")]; - tensor var_7444 = const()[name = string("op_7444"), val = tensor([0, 2, 1])]; - int32 var_7455 = const()[name = string("op_7455"), val = int32(-1)]; - fp16 const_325_promoted_to_fp16 = const()[name = string("const_325_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_121_cast_fp16 = transpose(perm = var_7444, x = var_7440_cast_fp16)[name = string("transpose_164")]; - tensor var_7457_cast_fp16 = mul(x = hidden_states_121_cast_fp16, y = const_325_promoted_to_fp16)[name = string("op_7457_cast_fp16")]; - bool input_151_interleave_0 = const()[name = string("input_151_interleave_0"), val = bool(false)]; - tensor input_151_cast_fp16 = concat(axis = var_7455, interleave = input_151_interleave_0, values = (hidden_states_121_cast_fp16, var_7457_cast_fp16))[name = string("input_151_cast_fp16")]; - tensor normed_181_axes_0 = const()[name = string("normed_181_axes_0"), val = tensor([-1])]; - fp16 var_7452_to_fp16 = const()[name = string("op_7452_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_181_cast_fp16 = layer_norm(axes = normed_181_axes_0, epsilon = var_7452_to_fp16, x = input_151_cast_fp16)[name = string("normed_181_cast_fp16")]; - tensor normed_183_begin_0 = const()[name = string("normed_183_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_183_end_0 = const()[name = string("normed_183_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_183_end_mask_0 = const()[name = string("normed_183_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_183_cast_fp16 = slice_by_index(begin = normed_183_begin_0, end = normed_183_end_0, end_mask = normed_183_end_mask_0, x = normed_181_cast_fp16)[name = string("normed_183_cast_fp16")]; - tensor var_7471_to_fp16 = const()[name = string("op_7471_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186326784)))]; - tensor attn_output_79_cast_fp16 = mul(x = normed_183_cast_fp16, y = var_7471_to_fp16)[name = string("attn_output_79_cast_fp16")]; - tensor hidden_states_123_cast_fp16 = add(x = hidden_states_113_cast_fp16, y = attn_output_79_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; - int32 var_7484 = const()[name = string("op_7484"), val = int32(-1)]; - fp16 const_329_promoted_to_fp16 = const()[name = string("const_329_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7486_cast_fp16 = mul(x = hidden_states_123_cast_fp16, y = const_329_promoted_to_fp16)[name = string("op_7486_cast_fp16")]; - bool input_153_interleave_0 = const()[name = string("input_153_interleave_0"), val = bool(false)]; - tensor input_153_cast_fp16 = concat(axis = var_7484, interleave = input_153_interleave_0, values = (hidden_states_123_cast_fp16, var_7486_cast_fp16))[name = string("input_153_cast_fp16")]; - tensor normed_185_axes_0 = const()[name = string("normed_185_axes_0"), val = tensor([-1])]; - fp16 var_7481_to_fp16 = const()[name = string("op_7481_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_185_cast_fp16 = layer_norm(axes = normed_185_axes_0, epsilon = var_7481_to_fp16, x = input_153_cast_fp16)[name = string("normed_185_cast_fp16")]; - tensor normed_187_begin_0 = const()[name = string("normed_187_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_187_end_0 = const()[name = string("normed_187_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_187_end_mask_0 = const()[name = string("normed_187_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_187_cast_fp16 = slice_by_index(begin = normed_187_begin_0, end = normed_187_end_0, end_mask = normed_187_end_mask_0, x = normed_185_cast_fp16)[name = string("normed_187_cast_fp16")]; - tensor var_7500_to_fp16 = const()[name = string("op_7500_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186329152)))]; - tensor x_125_cast_fp16 = mul(x = normed_187_cast_fp16, y = var_7500_to_fp16)[name = string("x_125_cast_fp16")]; - tensor var_7512 = const()[name = string("op_7512"), val = tensor([0, 2, 1])]; - tensor input_155_axes_0 = const()[name = string("input_155_axes_0"), val = tensor([2])]; - tensor var_7513_cast_fp16 = transpose(perm = var_7512, x = x_125_cast_fp16)[name = string("transpose_163")]; - tensor input_155_cast_fp16 = expand_dims(axes = input_155_axes_0, x = var_7513_cast_fp16)[name = string("input_155_cast_fp16")]; - string x_127_pad_type_0 = const()[name = string("x_127_pad_type_0"), val = string("valid")]; - tensor x_127_strides_0 = const()[name = string("x_127_strides_0"), val = tensor([1, 1])]; - tensor x_127_pad_0 = const()[name = string("x_127_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_127_dilations_0 = const()[name = string("x_127_dilations_0"), val = tensor([1, 1])]; - int32 x_127_groups_0 = const()[name = string("x_127_groups_0"), val = int32(1)]; - tensor model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1276592832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1282564864))))[name = string("model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_127_cast_fp16 = conv(dilations = x_127_dilations_0, groups = x_127_groups_0, pad = x_127_pad_0, pad_type = x_127_pad_type_0, strides = x_127_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_155_cast_fp16)[name = string("x_127_cast_fp16")]; - string b_15_pad_type_0 = const()[name = string("b_15_pad_type_0"), val = string("valid")]; - tensor b_15_strides_0 = const()[name = string("b_15_strides_0"), val = tensor([1, 1])]; - tensor b_15_pad_0 = const()[name = string("b_15_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_15_dilations_0 = const()[name = string("b_15_dilations_0"), val = tensor([1, 1])]; - int32 b_15_groups_0 = const()[name = string("b_15_groups_0"), val = int32(1)]; - tensor model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1282675520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1288647552))))[name = string("model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_15_cast_fp16 = conv(dilations = b_15_dilations_0, groups = b_15_groups_0, pad = b_15_pad_0, pad_type = b_15_pad_type_0, strides = b_15_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_155_cast_fp16)[name = string("b_15_cast_fp16")]; - string var_7538_mode_0 = const()[name = string("op_7538_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_7538_cast_fp16 = gelu(mode = var_7538_mode_0, x = x_127_cast_fp16)[name = string("op_7538_cast_fp16")]; - tensor input_157_cast_fp16 = mul(x = var_7538_cast_fp16, y = b_15_cast_fp16)[name = string("input_157_cast_fp16")]; - string e_15_pad_type_0 = const()[name = string("e_15_pad_type_0"), val = string("valid")]; - tensor e_15_strides_0 = const()[name = string("e_15_strides_0"), val = tensor([1, 1])]; - tensor e_15_pad_0 = const()[name = string("e_15_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_15_dilations_0 = const()[name = string("e_15_dilations_0"), val = tensor([1, 1])]; - int32 e_15_groups_0 = const()[name = string("e_15_groups_0"), val = int32(1)]; - tensor model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198496896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204468928))))[name = string("model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_15_cast_fp16 = conv(dilations = e_15_dilations_0, groups = e_15_groups_0, pad = e_15_pad_0, pad_type = e_15_pad_type_0, strides = e_15_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_157_cast_fp16)[name = string("e_15_cast_fp16")]; - tensor var_7546_axes_0 = const()[name = string("op_7546_axes_0"), val = tensor([2])]; - tensor var_7546_cast_fp16 = squeeze(axes = var_7546_axes_0, x = e_15_cast_fp16)[name = string("op_7546_cast_fp16")]; - tensor var_7547 = const()[name = string("op_7547"), val = tensor([0, 2, 1])]; - int32 var_7558 = const()[name = string("op_7558"), val = int32(-1)]; - fp16 const_333_promoted_to_fp16 = const()[name = string("const_333_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_125_cast_fp16 = transpose(perm = var_7547, x = var_7546_cast_fp16)[name = string("transpose_162")]; - tensor var_7560_cast_fp16 = mul(x = hidden_states_125_cast_fp16, y = const_333_promoted_to_fp16)[name = string("op_7560_cast_fp16")]; - bool input_159_interleave_0 = const()[name = string("input_159_interleave_0"), val = bool(false)]; - tensor input_159_cast_fp16 = concat(axis = var_7558, interleave = input_159_interleave_0, values = (hidden_states_125_cast_fp16, var_7560_cast_fp16))[name = string("input_159_cast_fp16")]; - tensor normed_189_axes_0 = const()[name = string("normed_189_axes_0"), val = tensor([-1])]; - fp16 var_7555_to_fp16 = const()[name = string("op_7555_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_189_cast_fp16 = layer_norm(axes = normed_189_axes_0, epsilon = var_7555_to_fp16, x = input_159_cast_fp16)[name = string("normed_189_cast_fp16")]; - tensor normed_191_begin_0 = const()[name = string("normed_191_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_191_end_0 = const()[name = string("normed_191_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_191_end_mask_0 = const()[name = string("normed_191_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_191_cast_fp16 = slice_by_index(begin = normed_191_begin_0, end = normed_191_end_0, end_mask = normed_191_end_mask_0, x = normed_189_cast_fp16)[name = string("normed_191_cast_fp16")]; - tensor var_7574_to_fp16 = const()[name = string("op_7574_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204487424)))]; - tensor hidden_states_127_cast_fp16 = mul(x = normed_191_cast_fp16, y = var_7574_to_fp16)[name = string("hidden_states_127_cast_fp16")]; - tensor hidden_states_129_cast_fp16 = add(x = hidden_states_123_cast_fp16, y = hidden_states_127_cast_fp16)[name = string("hidden_states_129_cast_fp16")]; - int32 var_7628 = const()[name = string("op_7628"), val = int32(-1)]; - fp16 const_338_promoted_to_fp16 = const()[name = string("const_338_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7630_cast_fp16 = mul(x = hidden_states_129_cast_fp16, y = const_338_promoted_to_fp16)[name = string("op_7630_cast_fp16")]; - bool input_161_interleave_0 = const()[name = string("input_161_interleave_0"), val = bool(false)]; - tensor input_161_cast_fp16 = concat(axis = var_7628, interleave = input_161_interleave_0, values = (hidden_states_129_cast_fp16, var_7630_cast_fp16))[name = string("input_161_cast_fp16")]; - tensor normed_193_axes_0 = const()[name = string("normed_193_axes_0"), val = tensor([-1])]; - fp16 var_7625_to_fp16 = const()[name = string("op_7625_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_193_cast_fp16 = layer_norm(axes = normed_193_axes_0, epsilon = var_7625_to_fp16, x = input_161_cast_fp16)[name = string("normed_193_cast_fp16")]; - tensor normed_195_begin_0 = const()[name = string("normed_195_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_195_end_0 = const()[name = string("normed_195_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_195_end_mask_0 = const()[name = string("normed_195_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_195_cast_fp16 = slice_by_index(begin = normed_195_begin_0, end = normed_195_end_0, end_mask = normed_195_end_mask_0, x = normed_193_cast_fp16)[name = string("normed_195_cast_fp16")]; - tensor var_7644_to_fp16 = const()[name = string("op_7644_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204489792)))]; - tensor hidden_states_131_cast_fp16 = mul(x = normed_195_cast_fp16, y = var_7644_to_fp16)[name = string("hidden_states_131_cast_fp16")]; - tensor var_7655 = const()[name = string("op_7655"), val = tensor([0, 2, 1])]; - tensor var_7658_axes_0 = const()[name = string("op_7658_axes_0"), val = tensor([2])]; - tensor var_7656_cast_fp16 = transpose(perm = var_7655, x = hidden_states_131_cast_fp16)[name = string("transpose_161")]; - tensor var_7658_cast_fp16 = expand_dims(axes = var_7658_axes_0, x = var_7656_cast_fp16)[name = string("op_7658_cast_fp16")]; - string query_states_65_pad_type_0 = const()[name = string("query_states_65_pad_type_0"), val = string("valid")]; - tensor query_states_65_strides_0 = const()[name = string("query_states_65_strides_0"), val = tensor([1, 1])]; - tensor query_states_65_pad_0 = const()[name = string("query_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_65_dilations_0 = const()[name = string("query_states_65_dilations_0"), val = tensor([1, 1])]; - int32 query_states_65_groups_0 = const()[name = string("query_states_65_groups_0"), val = int32(1)]; - tensor query_states_65 = conv(dilations = query_states_65_dilations_0, groups = query_states_65_groups_0, pad = query_states_65_pad_0, pad_type = query_states_65_pad_type_0, strides = query_states_65_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_7658_cast_fp16)[name = string("query_states_65")]; - string key_states_81_pad_type_0 = const()[name = string("key_states_81_pad_type_0"), val = string("valid")]; - tensor key_states_81_strides_0 = const()[name = string("key_states_81_strides_0"), val = tensor([1, 1])]; - tensor key_states_81_pad_0 = const()[name = string("key_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_81_dilations_0 = const()[name = string("key_states_81_dilations_0"), val = tensor([1, 1])]; - int32 key_states_81_groups_0 = const()[name = string("key_states_81_groups_0"), val = int32(1)]; - tensor key_states_81 = conv(dilations = key_states_81_dilations_0, groups = key_states_81_groups_0, pad = key_states_81_pad_0, pad_type = key_states_81_pad_type_0, strides = key_states_81_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_7658_cast_fp16)[name = string("key_states_81")]; - string value_states_65_pad_type_0 = const()[name = string("value_states_65_pad_type_0"), val = string("valid")]; - tensor value_states_65_strides_0 = const()[name = string("value_states_65_strides_0"), val = tensor([1, 1])]; - tensor value_states_65_pad_0 = const()[name = string("value_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_65_dilations_0 = const()[name = string("value_states_65_dilations_0"), val = tensor([1, 1])]; - int32 value_states_65_groups_0 = const()[name = string("value_states_65_groups_0"), val = int32(1)]; - tensor value_states_65 = conv(dilations = value_states_65_dilations_0, groups = value_states_65_groups_0, pad = value_states_65_pad_0, pad_type = value_states_65_pad_type_0, strides = value_states_65_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_7658_cast_fp16)[name = string("value_states_65")]; - tensor var_7700 = const()[name = string("op_7700"), val = tensor([1, 4, 256, 64])]; - tensor var_7701 = reshape(shape = var_7700, x = query_states_65)[name = string("op_7701")]; - tensor var_7706 = const()[name = string("op_7706"), val = tensor([0, 1, 3, 2])]; - tensor var_7711 = const()[name = string("op_7711"), val = tensor([1, 1, 256, 64])]; - tensor var_7712 = reshape(shape = var_7711, x = key_states_81)[name = string("op_7712")]; - tensor var_7717 = const()[name = string("op_7717"), val = tensor([0, 1, 3, 2])]; - tensor var_7722 = const()[name = string("op_7722"), val = tensor([1, 1, 256, 64])]; - tensor var_7723 = reshape(shape = var_7722, x = value_states_65)[name = string("op_7723")]; - tensor var_7728 = const()[name = string("op_7728"), val = tensor([0, 1, 3, 2])]; - int32 var_7739 = const()[name = string("op_7739"), val = int32(-1)]; - fp16 const_343_promoted = const()[name = string("const_343_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_133 = transpose(perm = var_7706, x = var_7701)[name = string("transpose_160")]; - tensor var_7741 = mul(x = hidden_states_133, y = const_343_promoted)[name = string("op_7741")]; - bool input_165_interleave_0 = const()[name = string("input_165_interleave_0"), val = bool(false)]; - tensor input_165 = concat(axis = var_7739, interleave = input_165_interleave_0, values = (hidden_states_133, var_7741))[name = string("input_165")]; - tensor normed_197_axes_0 = const()[name = string("normed_197_axes_0"), val = tensor([-1])]; - fp16 var_7736_to_fp16 = const()[name = string("op_7736_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_197_cast_fp16 = layer_norm(axes = normed_197_axes_0, epsilon = var_7736_to_fp16, x = input_165)[name = string("normed_197_cast_fp16")]; - tensor normed_199_begin_0 = const()[name = string("normed_199_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_199_end_0 = const()[name = string("normed_199_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_199_end_mask_0 = const()[name = string("normed_199_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_199 = slice_by_index(begin = normed_199_begin_0, end = normed_199_end_0, end_mask = normed_199_end_mask_0, x = normed_197_cast_fp16)[name = string("normed_199")]; - tensor var_7755_to_fp16 = const()[name = string("op_7755_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204492160)))]; - tensor q_17_cast_fp16 = mul(x = normed_199, y = var_7755_to_fp16)[name = string("q_17_cast_fp16")]; - int32 var_7766 = const()[name = string("op_7766"), val = int32(-1)]; - fp16 const_347_promoted = const()[name = string("const_347_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_135 = transpose(perm = var_7717, x = var_7712)[name = string("transpose_159")]; - tensor var_7768 = mul(x = hidden_states_135, y = const_347_promoted)[name = string("op_7768")]; - bool input_167_interleave_0 = const()[name = string("input_167_interleave_0"), val = bool(false)]; - tensor input_167 = concat(axis = var_7766, interleave = input_167_interleave_0, values = (hidden_states_135, var_7768))[name = string("input_167")]; - tensor normed_201_axes_0 = const()[name = string("normed_201_axes_0"), val = tensor([-1])]; - fp16 var_7763_to_fp16 = const()[name = string("op_7763_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_201_cast_fp16 = layer_norm(axes = normed_201_axes_0, epsilon = var_7763_to_fp16, x = input_167)[name = string("normed_201_cast_fp16")]; - tensor normed_203_begin_0 = const()[name = string("normed_203_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_203_end_0 = const()[name = string("normed_203_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_203_end_mask_0 = const()[name = string("normed_203_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_203 = slice_by_index(begin = normed_203_begin_0, end = normed_203_end_0, end_mask = normed_203_end_mask_0, x = normed_201_cast_fp16)[name = string("normed_203")]; - tensor var_7782_to_fp16 = const()[name = string("op_7782_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204492736)))]; - tensor k_17_cast_fp16 = mul(x = normed_203, y = var_7782_to_fp16)[name = string("k_17_cast_fp16")]; - tensor var_7796_cast_fp16 = mul(x = q_17_cast_fp16, y = cos_5)[name = string("op_7796_cast_fp16")]; - tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17_cast_fp16)[name = string("x1_33_cast_fp16")]; - tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17_cast_fp16)[name = string("x2_33_cast_fp16")]; - fp16 const_353_promoted_to_fp16 = const()[name = string("const_353_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7817_cast_fp16 = mul(x = x2_33_cast_fp16, y = const_353_promoted_to_fp16)[name = string("op_7817_cast_fp16")]; - int32 var_7819 = const()[name = string("op_7819"), val = int32(-1)]; - bool var_7820_interleave_0 = const()[name = string("op_7820_interleave_0"), val = bool(false)]; - tensor var_7820_cast_fp16 = concat(axis = var_7819, interleave = var_7820_interleave_0, values = (var_7817_cast_fp16, x1_33_cast_fp16))[name = string("op_7820_cast_fp16")]; - tensor var_7821_cast_fp16 = mul(x = var_7820_cast_fp16, y = sin_5)[name = string("op_7821_cast_fp16")]; - tensor query_states_67_cast_fp16 = add(x = var_7796_cast_fp16, y = var_7821_cast_fp16)[name = string("query_states_67_cast_fp16")]; - tensor var_7824_cast_fp16 = mul(x = k_17_cast_fp16, y = cos_5)[name = string("op_7824_cast_fp16")]; - tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17_cast_fp16)[name = string("x1_35_cast_fp16")]; - tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17_cast_fp16)[name = string("x2_35_cast_fp16")]; - fp16 const_356_promoted_to_fp16 = const()[name = string("const_356_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_7845_cast_fp16 = mul(x = x2_35_cast_fp16, y = const_356_promoted_to_fp16)[name = string("op_7845_cast_fp16")]; - int32 var_7847 = const()[name = string("op_7847"), val = int32(-1)]; - bool var_7848_interleave_0 = const()[name = string("op_7848_interleave_0"), val = bool(false)]; - tensor var_7848_cast_fp16 = concat(axis = var_7847, interleave = var_7848_interleave_0, values = (var_7845_cast_fp16, x1_35_cast_fp16))[name = string("op_7848_cast_fp16")]; - tensor var_7849_cast_fp16 = mul(x = var_7848_cast_fp16, y = sin_5)[name = string("op_7849_cast_fp16")]; - tensor key_states_83_cast_fp16 = add(x = var_7824_cast_fp16, y = var_7849_cast_fp16)[name = string("key_states_83_cast_fp16")]; - tensor key_slice_15_begin_0 = const()[name = string("key_slice_15_begin_0"), val = tensor([7, 0, 0, 0])]; - tensor key_slice_15_end_0 = const()[name = string("key_slice_15_end_0"), val = tensor([8, 1, 512, 256])]; - tensor key_slice_15_end_mask_0 = const()[name = string("key_slice_15_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_15_cast_fp16 = slice_by_index(begin = key_slice_15_begin_0, end = key_slice_15_end_0, end_mask = key_slice_15_end_mask_0, x = coreml_update_state_67)[name = string("key_slice_15_cast_fp16")]; - tensor var_7886_begin_0 = const()[name = string("op_7886_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_7886_end_0 = const()[name = string("op_7886_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_7886_end_mask_0 = const()[name = string("op_7886_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_7886_cast_fp16 = slice_by_index(begin = var_7886_begin_0, end = var_7886_end_0, end_mask = var_7886_end_mask_0, x = key_slice_15_cast_fp16)[name = string("op_7886_cast_fp16")]; - int32 var_7913 = const()[name = string("op_7913"), val = int32(2)]; - bool shifted_key_15_interleave_0 = const()[name = string("shifted_key_15_interleave_0"), val = bool(false)]; - tensor shifted_key_15_cast_fp16 = concat(axis = var_7913, interleave = shifted_key_15_interleave_0, values = (var_7886_cast_fp16, key_states_83_cast_fp16))[name = string("shifted_key_15_cast_fp16")]; - tensor concat_116 = const()[name = string("concat_116"), val = tensor([7, 0, 0, 0])]; - tensor concat_117 = const()[name = string("concat_117"), val = tensor([8, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_116, begin_mask = model_model_kv_cache_local_internal_tensor_assign_15_begin_mask_0, end = concat_117, end_mask = model_model_kv_cache_local_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_15_stride_0, update = shifted_key_15_cast_fp16, x = coreml_update_state_67)[name = string("model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_172_write_state")]; - tensor coreml_update_state_68 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_172")]; - tensor value_slice_15_begin_0 = const()[name = string("value_slice_15_begin_0"), val = tensor([29, 0, 0, 0])]; - tensor value_slice_15_end_0 = const()[name = string("value_slice_15_end_0"), val = tensor([30, 1, 512, 256])]; - tensor value_slice_15_end_mask_0 = const()[name = string("value_slice_15_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_15_cast_fp16 = slice_by_index(begin = value_slice_15_begin_0, end = value_slice_15_end_0, end_mask = value_slice_15_end_mask_0, x = coreml_update_state_68)[name = string("value_slice_15_cast_fp16")]; - tensor var_7956_begin_0 = const()[name = string("op_7956_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_7956_end_0 = const()[name = string("op_7956_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_7956_end_mask_0 = const()[name = string("op_7956_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_7956_cast_fp16 = slice_by_index(begin = var_7956_begin_0, end = var_7956_end_0, end_mask = var_7956_end_mask_0, x = value_slice_15_cast_fp16)[name = string("op_7956_cast_fp16")]; - int32 var_7983 = const()[name = string("op_7983"), val = int32(2)]; - bool shifted_value_15_interleave_0 = const()[name = string("shifted_value_15_interleave_0"), val = bool(false)]; - tensor value_states_67 = transpose(perm = var_7728, x = var_7723)[name = string("transpose_158")]; - tensor shifted_value_15_cast_fp16 = concat(axis = var_7983, interleave = shifted_value_15_interleave_0, values = (var_7956_cast_fp16, value_states_67))[name = string("shifted_value_15_cast_fp16")]; - tensor concat_118 = const()[name = string("concat_118"), val = tensor([29, 0, 0, 0])]; - tensor concat_119 = const()[name = string("concat_119"), val = tensor([30, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_118, begin_mask = model_model_kv_cache_local_internal_tensor_assign_16_begin_mask_0, end = concat_119, end_mask = model_model_kv_cache_local_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_16_stride_0, update = shifted_value_15_cast_fp16, x = coreml_update_state_68)[name = string("model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_173_write_state")]; - tensor coreml_update_state_69 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_173")]; - tensor var_8011_begin_0 = const()[name = string("op_8011_begin_0"), val = tensor([7, 0, 0, 0])]; - tensor var_8011_end_0 = const()[name = string("op_8011_end_0"), val = tensor([8, 1, 512, 256])]; - tensor var_8011_end_mask_0 = const()[name = string("op_8011_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_8011_cast_fp16 = slice_by_index(begin = var_8011_begin_0, end = var_8011_end_0, end_mask = var_8011_end_mask_0, x = coreml_update_state_69)[name = string("op_8011_cast_fp16")]; - tensor var_8018_begin_0 = const()[name = string("op_8018_begin_0"), val = tensor([29, 0, 0, 0])]; - tensor var_8018_end_0 = const()[name = string("op_8018_end_0"), val = tensor([30, 1, 512, 256])]; - tensor var_8018_end_mask_0 = const()[name = string("op_8018_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_8018_cast_fp16 = slice_by_index(begin = var_8018_begin_0, end = var_8018_end_0, end_mask = var_8018_end_mask_0, x = coreml_update_state_69)[name = string("op_8018_cast_fp16")]; - tensor var_8057 = const()[name = string("op_8057"), val = tensor([1, 4, 1, 1])]; - tensor x_133_cast_fp16 = tile(reps = var_8057, x = var_8011_cast_fp16)[name = string("x_133_cast_fp16")]; - tensor var_8077 = const()[name = string("op_8077"), val = tensor([1, 4, 1, 1])]; - tensor x_139_cast_fp16 = tile(reps = var_8077, x = var_8018_cast_fp16)[name = string("x_139_cast_fp16")]; - bool var_8104_transpose_x_0 = const()[name = string("op_8104_transpose_x_0"), val = bool(false)]; - bool var_8104_transpose_y_0 = const()[name = string("op_8104_transpose_y_0"), val = bool(true)]; - tensor var_8104 = matmul(transpose_x = var_8104_transpose_x_0, transpose_y = var_8104_transpose_y_0, x = query_states_67_cast_fp16, y = x_133_cast_fp16)[name = string("op_8104")]; - fp16 var_8105_to_fp16 = const()[name = string("op_8105_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_33_cast_fp16 = mul(x = var_8104, y = var_8105_to_fp16)[name = string("attn_weights_33_cast_fp16")]; - tensor attn_weights_35_cast_fp16 = add(x = attn_weights_33_cast_fp16, y = mask_slice_1)[name = string("attn_weights_35_cast_fp16")]; - int32 var_8140 = const()[name = string("op_8140"), val = int32(-1)]; - tensor var_8142_cast_fp16 = softmax(axis = var_8140, x = attn_weights_35_cast_fp16)[name = string("op_8142_cast_fp16")]; - tensor concat_124 = const()[name = string("concat_124"), val = tensor([4, 64, 512])]; - tensor reshape_24_cast_fp16 = reshape(shape = concat_124, x = var_8142_cast_fp16)[name = string("reshape_24_cast_fp16")]; - tensor concat_125 = const()[name = string("concat_125"), val = tensor([4, 512, 256])]; - tensor reshape_25_cast_fp16 = reshape(shape = concat_125, x = x_139_cast_fp16)[name = string("reshape_25_cast_fp16")]; - bool matmul_8_transpose_x_0 = const()[name = string("matmul_8_transpose_x_0"), val = bool(false)]; - bool matmul_8_transpose_y_0 = const()[name = string("matmul_8_transpose_y_0"), val = bool(false)]; - tensor matmul_8_cast_fp16 = matmul(transpose_x = matmul_8_transpose_x_0, transpose_y = matmul_8_transpose_y_0, x = reshape_24_cast_fp16, y = reshape_25_cast_fp16)[name = string("matmul_8_cast_fp16")]; - tensor concat_129 = const()[name = string("concat_129"), val = tensor([1, 4, 64, 256])]; - tensor reshape_26_cast_fp16 = reshape(shape = concat_129, x = matmul_8_cast_fp16)[name = string("reshape_26_cast_fp16")]; - tensor var_8154_perm_0 = const()[name = string("op_8154_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_8173 = const()[name = string("op_8173"), val = tensor([1, 64, 1024])]; - tensor var_8154_cast_fp16 = transpose(perm = var_8154_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_157")]; - tensor attn_output_85_cast_fp16 = reshape(shape = var_8173, x = var_8154_cast_fp16)[name = string("attn_output_85_cast_fp16")]; - tensor var_8178 = const()[name = string("op_8178"), val = tensor([0, 2, 1])]; - string var_8194_pad_type_0 = const()[name = string("op_8194_pad_type_0"), val = string("valid")]; - int32 var_8194_groups_0 = const()[name = string("op_8194_groups_0"), val = int32(1)]; - tensor var_8194_strides_0 = const()[name = string("op_8194_strides_0"), val = tensor([1])]; - tensor var_8194_pad_0 = const()[name = string("op_8194_pad_0"), val = tensor([0, 0])]; - tensor var_8194_dilations_0 = const()[name = string("op_8194_dilations_0"), val = tensor([1])]; - tensor squeeze_8_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204493312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205378112))))[name = string("squeeze_8_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_8179_cast_fp16 = transpose(perm = var_8178, x = attn_output_85_cast_fp16)[name = string("transpose_156")]; - tensor var_8194_cast_fp16 = conv(dilations = var_8194_dilations_0, groups = var_8194_groups_0, pad = var_8194_pad_0, pad_type = var_8194_pad_type_0, strides = var_8194_strides_0, weight = squeeze_8_cast_fp16_to_fp32_to_fp16_palettized, x = var_8179_cast_fp16)[name = string("op_8194_cast_fp16")]; - tensor var_8198 = const()[name = string("op_8198"), val = tensor([0, 2, 1])]; - int32 var_8209 = const()[name = string("op_8209"), val = int32(-1)]; - fp16 const_367_promoted_to_fp16 = const()[name = string("const_367_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_137_cast_fp16 = transpose(perm = var_8198, x = var_8194_cast_fp16)[name = string("transpose_155")]; - tensor var_8211_cast_fp16 = mul(x = hidden_states_137_cast_fp16, y = const_367_promoted_to_fp16)[name = string("op_8211_cast_fp16")]; - bool input_171_interleave_0 = const()[name = string("input_171_interleave_0"), val = bool(false)]; - tensor input_171_cast_fp16 = concat(axis = var_8209, interleave = input_171_interleave_0, values = (hidden_states_137_cast_fp16, var_8211_cast_fp16))[name = string("input_171_cast_fp16")]; - tensor normed_205_axes_0 = const()[name = string("normed_205_axes_0"), val = tensor([-1])]; - fp16 var_8206_to_fp16 = const()[name = string("op_8206_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_205_cast_fp16 = layer_norm(axes = normed_205_axes_0, epsilon = var_8206_to_fp16, x = input_171_cast_fp16)[name = string("normed_205_cast_fp16")]; - tensor normed_207_begin_0 = const()[name = string("normed_207_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_207_end_0 = const()[name = string("normed_207_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_207_end_mask_0 = const()[name = string("normed_207_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_207_cast_fp16 = slice_by_index(begin = normed_207_begin_0, end = normed_207_end_0, end_mask = normed_207_end_mask_0, x = normed_205_cast_fp16)[name = string("normed_207_cast_fp16")]; - tensor var_8225_to_fp16 = const()[name = string("op_8225_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205396608)))]; - tensor attn_output_89_cast_fp16 = mul(x = normed_207_cast_fp16, y = var_8225_to_fp16)[name = string("attn_output_89_cast_fp16")]; - tensor hidden_states_139_cast_fp16 = add(x = hidden_states_129_cast_fp16, y = attn_output_89_cast_fp16)[name = string("hidden_states_139_cast_fp16")]; - int32 var_8238 = const()[name = string("op_8238"), val = int32(-1)]; - fp16 const_371_promoted_to_fp16 = const()[name = string("const_371_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8240_cast_fp16 = mul(x = hidden_states_139_cast_fp16, y = const_371_promoted_to_fp16)[name = string("op_8240_cast_fp16")]; - bool input_173_interleave_0 = const()[name = string("input_173_interleave_0"), val = bool(false)]; - tensor input_173_cast_fp16 = concat(axis = var_8238, interleave = input_173_interleave_0, values = (hidden_states_139_cast_fp16, var_8240_cast_fp16))[name = string("input_173_cast_fp16")]; - tensor normed_209_axes_0 = const()[name = string("normed_209_axes_0"), val = tensor([-1])]; - fp16 var_8235_to_fp16 = const()[name = string("op_8235_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_209_cast_fp16 = layer_norm(axes = normed_209_axes_0, epsilon = var_8235_to_fp16, x = input_173_cast_fp16)[name = string("normed_209_cast_fp16")]; - tensor normed_211_begin_0 = const()[name = string("normed_211_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_211_end_0 = const()[name = string("normed_211_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_211_end_mask_0 = const()[name = string("normed_211_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_211_cast_fp16 = slice_by_index(begin = normed_211_begin_0, end = normed_211_end_0, end_mask = normed_211_end_mask_0, x = normed_209_cast_fp16)[name = string("normed_211_cast_fp16")]; - tensor var_8254_to_fp16 = const()[name = string("op_8254_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205398976)))]; - tensor x_141_cast_fp16 = mul(x = normed_211_cast_fp16, y = var_8254_to_fp16)[name = string("x_141_cast_fp16")]; - tensor var_8266 = const()[name = string("op_8266"), val = tensor([0, 2, 1])]; - tensor input_175_axes_0 = const()[name = string("input_175_axes_0"), val = tensor([2])]; - tensor var_8267_cast_fp16 = transpose(perm = var_8266, x = x_141_cast_fp16)[name = string("transpose_154")]; - tensor input_175_cast_fp16 = expand_dims(axes = input_175_axes_0, x = var_8267_cast_fp16)[name = string("input_175_cast_fp16")]; - string x_143_pad_type_0 = const()[name = string("x_143_pad_type_0"), val = string("valid")]; - tensor x_143_strides_0 = const()[name = string("x_143_strides_0"), val = tensor([1, 1])]; - tensor x_143_pad_0 = const()[name = string("x_143_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_143_dilations_0 = const()[name = string("x_143_dilations_0"), val = tensor([1, 1])]; - int32 x_143_groups_0 = const()[name = string("x_143_groups_0"), val = int32(1)]; - tensor model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1288758208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1294730240))))[name = string("model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_143_cast_fp16 = conv(dilations = x_143_dilations_0, groups = x_143_groups_0, pad = x_143_pad_0, pad_type = x_143_pad_type_0, strides = x_143_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_175_cast_fp16)[name = string("x_143_cast_fp16")]; - string b_17_pad_type_0 = const()[name = string("b_17_pad_type_0"), val = string("valid")]; - tensor b_17_strides_0 = const()[name = string("b_17_strides_0"), val = tensor([1, 1])]; - tensor b_17_pad_0 = const()[name = string("b_17_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_17_dilations_0 = const()[name = string("b_17_dilations_0"), val = tensor([1, 1])]; - int32 b_17_groups_0 = const()[name = string("b_17_groups_0"), val = int32(1)]; - tensor model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1294840896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1300812928))))[name = string("model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_17_cast_fp16 = conv(dilations = b_17_dilations_0, groups = b_17_groups_0, pad = b_17_pad_0, pad_type = b_17_pad_type_0, strides = b_17_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_175_cast_fp16)[name = string("b_17_cast_fp16")]; - string var_8292_mode_0 = const()[name = string("op_8292_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_8292_cast_fp16 = gelu(mode = var_8292_mode_0, x = x_143_cast_fp16)[name = string("op_8292_cast_fp16")]; - tensor input_177_cast_fp16 = mul(x = var_8292_cast_fp16, y = b_17_cast_fp16)[name = string("input_177_cast_fp16")]; - string e_17_pad_type_0 = const()[name = string("e_17_pad_type_0"), val = string("valid")]; - tensor e_17_strides_0 = const()[name = string("e_17_strides_0"), val = tensor([1, 1])]; - tensor e_17_pad_0 = const()[name = string("e_17_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_17_dilations_0 = const()[name = string("e_17_dilations_0"), val = tensor([1, 1])]; - int32 e_17_groups_0 = const()[name = string("e_17_groups_0"), val = int32(1)]; - tensor model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217566720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223538752))))[name = string("model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_17_cast_fp16 = conv(dilations = e_17_dilations_0, groups = e_17_groups_0, pad = e_17_pad_0, pad_type = e_17_pad_type_0, strides = e_17_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_177_cast_fp16)[name = string("e_17_cast_fp16")]; - tensor var_8300_axes_0 = const()[name = string("op_8300_axes_0"), val = tensor([2])]; - tensor var_8300_cast_fp16 = squeeze(axes = var_8300_axes_0, x = e_17_cast_fp16)[name = string("op_8300_cast_fp16")]; - tensor var_8301 = const()[name = string("op_8301"), val = tensor([0, 2, 1])]; - int32 var_8312 = const()[name = string("op_8312"), val = int32(-1)]; - fp16 const_375_promoted_to_fp16 = const()[name = string("const_375_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_141_cast_fp16 = transpose(perm = var_8301, x = var_8300_cast_fp16)[name = string("transpose_153")]; - tensor var_8314_cast_fp16 = mul(x = hidden_states_141_cast_fp16, y = const_375_promoted_to_fp16)[name = string("op_8314_cast_fp16")]; - bool input_179_interleave_0 = const()[name = string("input_179_interleave_0"), val = bool(false)]; - tensor input_179_cast_fp16 = concat(axis = var_8312, interleave = input_179_interleave_0, values = (hidden_states_141_cast_fp16, var_8314_cast_fp16))[name = string("input_179_cast_fp16")]; - tensor normed_213_axes_0 = const()[name = string("normed_213_axes_0"), val = tensor([-1])]; - fp16 var_8309_to_fp16 = const()[name = string("op_8309_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_213_cast_fp16 = layer_norm(axes = normed_213_axes_0, epsilon = var_8309_to_fp16, x = input_179_cast_fp16)[name = string("normed_213_cast_fp16")]; - tensor normed_215_begin_0 = const()[name = string("normed_215_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_215_end_0 = const()[name = string("normed_215_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_215_end_mask_0 = const()[name = string("normed_215_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_215_cast_fp16 = slice_by_index(begin = normed_215_begin_0, end = normed_215_end_0, end_mask = normed_215_end_mask_0, x = normed_213_cast_fp16)[name = string("normed_215_cast_fp16")]; - tensor var_8328_to_fp16 = const()[name = string("op_8328_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223557248)))]; - tensor hidden_states_143_cast_fp16 = mul(x = normed_215_cast_fp16, y = var_8328_to_fp16)[name = string("hidden_states_143_cast_fp16")]; - tensor hidden_states_145_cast_fp16 = add(x = hidden_states_139_cast_fp16, y = hidden_states_143_cast_fp16)[name = string("hidden_states_145_cast_fp16")]; - int32 var_8382 = const()[name = string("op_8382"), val = int32(-1)]; - fp16 const_380_promoted_to_fp16 = const()[name = string("const_380_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8384_cast_fp16 = mul(x = hidden_states_145_cast_fp16, y = const_380_promoted_to_fp16)[name = string("op_8384_cast_fp16")]; - bool input_181_interleave_0 = const()[name = string("input_181_interleave_0"), val = bool(false)]; - tensor input_181_cast_fp16 = concat(axis = var_8382, interleave = input_181_interleave_0, values = (hidden_states_145_cast_fp16, var_8384_cast_fp16))[name = string("input_181_cast_fp16")]; - tensor normed_217_axes_0 = const()[name = string("normed_217_axes_0"), val = tensor([-1])]; - fp16 var_8379_to_fp16 = const()[name = string("op_8379_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_217_cast_fp16 = layer_norm(axes = normed_217_axes_0, epsilon = var_8379_to_fp16, x = input_181_cast_fp16)[name = string("normed_217_cast_fp16")]; - tensor normed_219_begin_0 = const()[name = string("normed_219_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_219_end_0 = const()[name = string("normed_219_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_219_end_mask_0 = const()[name = string("normed_219_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_219_cast_fp16 = slice_by_index(begin = normed_219_begin_0, end = normed_219_end_0, end_mask = normed_219_end_mask_0, x = normed_217_cast_fp16)[name = string("normed_219_cast_fp16")]; - tensor var_8398_to_fp16 = const()[name = string("op_8398_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223559616)))]; - tensor hidden_states_147_cast_fp16 = mul(x = normed_219_cast_fp16, y = var_8398_to_fp16)[name = string("hidden_states_147_cast_fp16")]; - tensor var_8409 = const()[name = string("op_8409"), val = tensor([0, 2, 1])]; - tensor var_8412_axes_0 = const()[name = string("op_8412_axes_0"), val = tensor([2])]; - tensor var_8410_cast_fp16 = transpose(perm = var_8409, x = hidden_states_147_cast_fp16)[name = string("transpose_152")]; - tensor var_8412_cast_fp16 = expand_dims(axes = var_8412_axes_0, x = var_8410_cast_fp16)[name = string("op_8412_cast_fp16")]; - string query_states_73_pad_type_0 = const()[name = string("query_states_73_pad_type_0"), val = string("valid")]; - tensor query_states_73_strides_0 = const()[name = string("query_states_73_strides_0"), val = tensor([1, 1])]; - tensor query_states_73_pad_0 = const()[name = string("query_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_73_dilations_0 = const()[name = string("query_states_73_dilations_0"), val = tensor([1, 1])]; - int32 query_states_73_groups_0 = const()[name = string("query_states_73_groups_0"), val = int32(1)]; - tensor query_states_73 = conv(dilations = query_states_73_dilations_0, groups = query_states_73_groups_0, pad = query_states_73_pad_0, pad_type = query_states_73_pad_type_0, strides = query_states_73_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_8412_cast_fp16)[name = string("query_states_73")]; - string key_states_91_pad_type_0 = const()[name = string("key_states_91_pad_type_0"), val = string("valid")]; - tensor key_states_91_strides_0 = const()[name = string("key_states_91_strides_0"), val = tensor([1, 1])]; - tensor key_states_91_pad_0 = const()[name = string("key_states_91_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_91_dilations_0 = const()[name = string("key_states_91_dilations_0"), val = tensor([1, 1])]; - int32 key_states_91_groups_0 = const()[name = string("key_states_91_groups_0"), val = int32(1)]; - tensor key_states_91 = conv(dilations = key_states_91_dilations_0, groups = key_states_91_groups_0, pad = key_states_91_pad_0, pad_type = key_states_91_pad_type_0, strides = key_states_91_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_8412_cast_fp16)[name = string("key_states_91")]; - string value_states_73_pad_type_0 = const()[name = string("value_states_73_pad_type_0"), val = string("valid")]; - tensor value_states_73_strides_0 = const()[name = string("value_states_73_strides_0"), val = tensor([1, 1])]; - tensor value_states_73_pad_0 = const()[name = string("value_states_73_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_73_dilations_0 = const()[name = string("value_states_73_dilations_0"), val = tensor([1, 1])]; - int32 value_states_73_groups_0 = const()[name = string("value_states_73_groups_0"), val = int32(1)]; - tensor value_states_73 = conv(dilations = value_states_73_dilations_0, groups = value_states_73_groups_0, pad = value_states_73_pad_0, pad_type = value_states_73_pad_type_0, strides = value_states_73_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_8412_cast_fp16)[name = string("value_states_73")]; - tensor var_8454 = const()[name = string("op_8454"), val = tensor([1, 4, 256, 64])]; - tensor var_8455 = reshape(shape = var_8454, x = query_states_73)[name = string("op_8455")]; - tensor var_8460 = const()[name = string("op_8460"), val = tensor([0, 1, 3, 2])]; - tensor var_8465 = const()[name = string("op_8465"), val = tensor([1, 1, 256, 64])]; - tensor var_8466 = reshape(shape = var_8465, x = key_states_91)[name = string("op_8466")]; - tensor var_8471 = const()[name = string("op_8471"), val = tensor([0, 1, 3, 2])]; - tensor var_8476 = const()[name = string("op_8476"), val = tensor([1, 1, 256, 64])]; - tensor var_8477 = reshape(shape = var_8476, x = value_states_73)[name = string("op_8477")]; - tensor var_8482 = const()[name = string("op_8482"), val = tensor([0, 1, 3, 2])]; - int32 var_8493 = const()[name = string("op_8493"), val = int32(-1)]; - fp16 const_385_promoted = const()[name = string("const_385_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_149 = transpose(perm = var_8460, x = var_8455)[name = string("transpose_151")]; - tensor var_8495 = mul(x = hidden_states_149, y = const_385_promoted)[name = string("op_8495")]; - bool input_185_interleave_0 = const()[name = string("input_185_interleave_0"), val = bool(false)]; - tensor input_185 = concat(axis = var_8493, interleave = input_185_interleave_0, values = (hidden_states_149, var_8495))[name = string("input_185")]; - tensor normed_221_axes_0 = const()[name = string("normed_221_axes_0"), val = tensor([-1])]; - fp16 var_8490_to_fp16 = const()[name = string("op_8490_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_221_cast_fp16 = layer_norm(axes = normed_221_axes_0, epsilon = var_8490_to_fp16, x = input_185)[name = string("normed_221_cast_fp16")]; - tensor normed_223_begin_0 = const()[name = string("normed_223_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_223_end_0 = const()[name = string("normed_223_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_223_end_mask_0 = const()[name = string("normed_223_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_223 = slice_by_index(begin = normed_223_begin_0, end = normed_223_end_0, end_mask = normed_223_end_mask_0, x = normed_221_cast_fp16)[name = string("normed_223")]; - tensor var_8509_to_fp16 = const()[name = string("op_8509_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223561984)))]; - tensor q_19_cast_fp16 = mul(x = normed_223, y = var_8509_to_fp16)[name = string("q_19_cast_fp16")]; - int32 var_8520 = const()[name = string("op_8520"), val = int32(-1)]; - fp16 const_389_promoted = const()[name = string("const_389_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_151 = transpose(perm = var_8471, x = var_8466)[name = string("transpose_150")]; - tensor var_8522 = mul(x = hidden_states_151, y = const_389_promoted)[name = string("op_8522")]; - bool input_187_interleave_0 = const()[name = string("input_187_interleave_0"), val = bool(false)]; - tensor input_187 = concat(axis = var_8520, interleave = input_187_interleave_0, values = (hidden_states_151, var_8522))[name = string("input_187")]; - tensor normed_225_axes_0 = const()[name = string("normed_225_axes_0"), val = tensor([-1])]; - fp16 var_8517_to_fp16 = const()[name = string("op_8517_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_225_cast_fp16 = layer_norm(axes = normed_225_axes_0, epsilon = var_8517_to_fp16, x = input_187)[name = string("normed_225_cast_fp16")]; - tensor normed_227_begin_0 = const()[name = string("normed_227_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_227_end_0 = const()[name = string("normed_227_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_227_end_mask_0 = const()[name = string("normed_227_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_227 = slice_by_index(begin = normed_227_begin_0, end = normed_227_end_0, end_mask = normed_227_end_mask_0, x = normed_225_cast_fp16)[name = string("normed_227")]; - tensor var_8536_to_fp16 = const()[name = string("op_8536_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223562560)))]; - tensor k_19_cast_fp16 = mul(x = normed_227, y = var_8536_to_fp16)[name = string("k_19_cast_fp16")]; - tensor var_8550_cast_fp16 = mul(x = q_19_cast_fp16, y = cos_5)[name = string("op_8550_cast_fp16")]; - tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19_cast_fp16)[name = string("x1_37_cast_fp16")]; - tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19_cast_fp16)[name = string("x2_37_cast_fp16")]; - fp16 const_395_promoted_to_fp16 = const()[name = string("const_395_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8571_cast_fp16 = mul(x = x2_37_cast_fp16, y = const_395_promoted_to_fp16)[name = string("op_8571_cast_fp16")]; - int32 var_8573 = const()[name = string("op_8573"), val = int32(-1)]; - bool var_8574_interleave_0 = const()[name = string("op_8574_interleave_0"), val = bool(false)]; - tensor var_8574_cast_fp16 = concat(axis = var_8573, interleave = var_8574_interleave_0, values = (var_8571_cast_fp16, x1_37_cast_fp16))[name = string("op_8574_cast_fp16")]; - tensor var_8575_cast_fp16 = mul(x = var_8574_cast_fp16, y = sin_5)[name = string("op_8575_cast_fp16")]; - tensor query_states_75_cast_fp16 = add(x = var_8550_cast_fp16, y = var_8575_cast_fp16)[name = string("query_states_75_cast_fp16")]; - tensor var_8578_cast_fp16 = mul(x = k_19_cast_fp16, y = cos_5)[name = string("op_8578_cast_fp16")]; - tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19_cast_fp16)[name = string("x1_39_cast_fp16")]; - tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19_cast_fp16)[name = string("x2_39_cast_fp16")]; - fp16 const_398_promoted_to_fp16 = const()[name = string("const_398_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8599_cast_fp16 = mul(x = x2_39_cast_fp16, y = const_398_promoted_to_fp16)[name = string("op_8599_cast_fp16")]; - int32 var_8601 = const()[name = string("op_8601"), val = int32(-1)]; - bool var_8602_interleave_0 = const()[name = string("op_8602_interleave_0"), val = bool(false)]; - tensor var_8602_cast_fp16 = concat(axis = var_8601, interleave = var_8602_interleave_0, values = (var_8599_cast_fp16, x1_39_cast_fp16))[name = string("op_8602_cast_fp16")]; - tensor var_8603_cast_fp16 = mul(x = var_8602_cast_fp16, y = sin_5)[name = string("op_8603_cast_fp16")]; - tensor key_states_93_cast_fp16 = add(x = var_8578_cast_fp16, y = var_8603_cast_fp16)[name = string("key_states_93_cast_fp16")]; - tensor key_slice_17_begin_0 = const()[name = string("key_slice_17_begin_0"), val = tensor([8, 0, 0, 0])]; - tensor key_slice_17_end_0 = const()[name = string("key_slice_17_end_0"), val = tensor([9, 1, 512, 256])]; - tensor key_slice_17_end_mask_0 = const()[name = string("key_slice_17_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_17_cast_fp16 = slice_by_index(begin = key_slice_17_begin_0, end = key_slice_17_end_0, end_mask = key_slice_17_end_mask_0, x = coreml_update_state_69)[name = string("key_slice_17_cast_fp16")]; - tensor var_8640_begin_0 = const()[name = string("op_8640_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_8640_end_0 = const()[name = string("op_8640_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_8640_end_mask_0 = const()[name = string("op_8640_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_8640_cast_fp16 = slice_by_index(begin = var_8640_begin_0, end = var_8640_end_0, end_mask = var_8640_end_mask_0, x = key_slice_17_cast_fp16)[name = string("op_8640_cast_fp16")]; - int32 var_8667 = const()[name = string("op_8667"), val = int32(2)]; - bool shifted_key_17_interleave_0 = const()[name = string("shifted_key_17_interleave_0"), val = bool(false)]; - tensor shifted_key_17_cast_fp16 = concat(axis = var_8667, interleave = shifted_key_17_interleave_0, values = (var_8640_cast_fp16, key_states_93_cast_fp16))[name = string("shifted_key_17_cast_fp16")]; - tensor concat_130 = const()[name = string("concat_130"), val = tensor([8, 0, 0, 0])]; - tensor concat_131 = const()[name = string("concat_131"), val = tensor([9, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_130, begin_mask = model_model_kv_cache_local_internal_tensor_assign_17_begin_mask_0, end = concat_131, end_mask = model_model_kv_cache_local_internal_tensor_assign_17_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_17_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_17_stride_0, update = shifted_key_17_cast_fp16, x = coreml_update_state_69)[name = string("model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_17_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_174_write_state")]; - tensor coreml_update_state_70 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_174")]; - tensor value_slice_17_begin_0 = const()[name = string("value_slice_17_begin_0"), val = tensor([30, 0, 0, 0])]; - tensor value_slice_17_end_0 = const()[name = string("value_slice_17_end_0"), val = tensor([31, 1, 512, 256])]; - tensor value_slice_17_end_mask_0 = const()[name = string("value_slice_17_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_17_cast_fp16 = slice_by_index(begin = value_slice_17_begin_0, end = value_slice_17_end_0, end_mask = value_slice_17_end_mask_0, x = coreml_update_state_70)[name = string("value_slice_17_cast_fp16")]; - tensor var_8710_begin_0 = const()[name = string("op_8710_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_8710_end_0 = const()[name = string("op_8710_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_8710_end_mask_0 = const()[name = string("op_8710_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_8710_cast_fp16 = slice_by_index(begin = var_8710_begin_0, end = var_8710_end_0, end_mask = var_8710_end_mask_0, x = value_slice_17_cast_fp16)[name = string("op_8710_cast_fp16")]; - int32 var_8737 = const()[name = string("op_8737"), val = int32(2)]; - bool shifted_value_17_interleave_0 = const()[name = string("shifted_value_17_interleave_0"), val = bool(false)]; - tensor value_states_75 = transpose(perm = var_8482, x = var_8477)[name = string("transpose_149")]; - tensor shifted_value_17_cast_fp16 = concat(axis = var_8737, interleave = shifted_value_17_interleave_0, values = (var_8710_cast_fp16, value_states_75))[name = string("shifted_value_17_cast_fp16")]; - tensor concat_132 = const()[name = string("concat_132"), val = tensor([30, 0, 0, 0])]; - tensor concat_133 = const()[name = string("concat_133"), val = tensor([31, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_132, begin_mask = model_model_kv_cache_local_internal_tensor_assign_18_begin_mask_0, end = concat_133, end_mask = model_model_kv_cache_local_internal_tensor_assign_18_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_18_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_18_stride_0, update = shifted_value_17_cast_fp16, x = coreml_update_state_70)[name = string("model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_18_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_175_write_state")]; - tensor coreml_update_state_71 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_175")]; - tensor var_8765_begin_0 = const()[name = string("op_8765_begin_0"), val = tensor([8, 0, 0, 0])]; - tensor var_8765_end_0 = const()[name = string("op_8765_end_0"), val = tensor([9, 1, 512, 256])]; - tensor var_8765_end_mask_0 = const()[name = string("op_8765_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_8765_cast_fp16 = slice_by_index(begin = var_8765_begin_0, end = var_8765_end_0, end_mask = var_8765_end_mask_0, x = coreml_update_state_71)[name = string("op_8765_cast_fp16")]; - tensor var_8772_begin_0 = const()[name = string("op_8772_begin_0"), val = tensor([30, 0, 0, 0])]; - tensor var_8772_end_0 = const()[name = string("op_8772_end_0"), val = tensor([31, 1, 512, 256])]; - tensor var_8772_end_mask_0 = const()[name = string("op_8772_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_8772_cast_fp16 = slice_by_index(begin = var_8772_begin_0, end = var_8772_end_0, end_mask = var_8772_end_mask_0, x = coreml_update_state_71)[name = string("op_8772_cast_fp16")]; - tensor var_8811 = const()[name = string("op_8811"), val = tensor([1, 4, 1, 1])]; - tensor x_149_cast_fp16 = tile(reps = var_8811, x = var_8765_cast_fp16)[name = string("x_149_cast_fp16")]; - tensor var_8831 = const()[name = string("op_8831"), val = tensor([1, 4, 1, 1])]; - tensor x_155_cast_fp16 = tile(reps = var_8831, x = var_8772_cast_fp16)[name = string("x_155_cast_fp16")]; - bool var_8858_transpose_x_0 = const()[name = string("op_8858_transpose_x_0"), val = bool(false)]; - bool var_8858_transpose_y_0 = const()[name = string("op_8858_transpose_y_0"), val = bool(true)]; - tensor var_8858 = matmul(transpose_x = var_8858_transpose_x_0, transpose_y = var_8858_transpose_y_0, x = query_states_75_cast_fp16, y = x_149_cast_fp16)[name = string("op_8858")]; - fp16 var_8859_to_fp16 = const()[name = string("op_8859_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_37_cast_fp16 = mul(x = var_8858, y = var_8859_to_fp16)[name = string("attn_weights_37_cast_fp16")]; - tensor attn_weights_39_cast_fp16 = add(x = attn_weights_37_cast_fp16, y = mask_slice_1)[name = string("attn_weights_39_cast_fp16")]; - int32 var_8894 = const()[name = string("op_8894"), val = int32(-1)]; - tensor var_8896_cast_fp16 = softmax(axis = var_8894, x = attn_weights_39_cast_fp16)[name = string("op_8896_cast_fp16")]; - tensor concat_138 = const()[name = string("concat_138"), val = tensor([4, 64, 512])]; - tensor reshape_27_cast_fp16 = reshape(shape = concat_138, x = var_8896_cast_fp16)[name = string("reshape_27_cast_fp16")]; - tensor concat_139 = const()[name = string("concat_139"), val = tensor([4, 512, 256])]; - tensor reshape_28_cast_fp16 = reshape(shape = concat_139, x = x_155_cast_fp16)[name = string("reshape_28_cast_fp16")]; - bool matmul_9_transpose_x_0 = const()[name = string("matmul_9_transpose_x_0"), val = bool(false)]; - bool matmul_9_transpose_y_0 = const()[name = string("matmul_9_transpose_y_0"), val = bool(false)]; - tensor matmul_9_cast_fp16 = matmul(transpose_x = matmul_9_transpose_x_0, transpose_y = matmul_9_transpose_y_0, x = reshape_27_cast_fp16, y = reshape_28_cast_fp16)[name = string("matmul_9_cast_fp16")]; - tensor concat_143 = const()[name = string("concat_143"), val = tensor([1, 4, 64, 256])]; - tensor reshape_29_cast_fp16 = reshape(shape = concat_143, x = matmul_9_cast_fp16)[name = string("reshape_29_cast_fp16")]; - tensor var_8908_perm_0 = const()[name = string("op_8908_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_8927 = const()[name = string("op_8927"), val = tensor([1, 64, 1024])]; - tensor var_8908_cast_fp16 = transpose(perm = var_8908_perm_0, x = reshape_29_cast_fp16)[name = string("transpose_148")]; - tensor attn_output_95_cast_fp16 = reshape(shape = var_8927, x = var_8908_cast_fp16)[name = string("attn_output_95_cast_fp16")]; - tensor var_8932 = const()[name = string("op_8932"), val = tensor([0, 2, 1])]; - string var_8948_pad_type_0 = const()[name = string("op_8948_pad_type_0"), val = string("valid")]; - int32 var_8948_groups_0 = const()[name = string("op_8948_groups_0"), val = int32(1)]; - tensor var_8948_strides_0 = const()[name = string("op_8948_strides_0"), val = tensor([1])]; - tensor var_8948_pad_0 = const()[name = string("op_8948_pad_0"), val = tensor([0, 0])]; - tensor var_8948_dilations_0 = const()[name = string("op_8948_dilations_0"), val = tensor([1])]; - tensor squeeze_9_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223563136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224447936))))[name = string("squeeze_9_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_8933_cast_fp16 = transpose(perm = var_8932, x = attn_output_95_cast_fp16)[name = string("transpose_147")]; - tensor var_8948_cast_fp16 = conv(dilations = var_8948_dilations_0, groups = var_8948_groups_0, pad = var_8948_pad_0, pad_type = var_8948_pad_type_0, strides = var_8948_strides_0, weight = squeeze_9_cast_fp16_to_fp32_to_fp16_palettized, x = var_8933_cast_fp16)[name = string("op_8948_cast_fp16")]; - tensor var_8952 = const()[name = string("op_8952"), val = tensor([0, 2, 1])]; - int32 var_8963 = const()[name = string("op_8963"), val = int32(-1)]; - fp16 const_409_promoted_to_fp16 = const()[name = string("const_409_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_153_cast_fp16 = transpose(perm = var_8952, x = var_8948_cast_fp16)[name = string("transpose_146")]; - tensor var_8965_cast_fp16 = mul(x = hidden_states_153_cast_fp16, y = const_409_promoted_to_fp16)[name = string("op_8965_cast_fp16")]; - bool input_191_interleave_0 = const()[name = string("input_191_interleave_0"), val = bool(false)]; - tensor input_191_cast_fp16 = concat(axis = var_8963, interleave = input_191_interleave_0, values = (hidden_states_153_cast_fp16, var_8965_cast_fp16))[name = string("input_191_cast_fp16")]; - tensor normed_229_axes_0 = const()[name = string("normed_229_axes_0"), val = tensor([-1])]; - fp16 var_8960_to_fp16 = const()[name = string("op_8960_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_229_cast_fp16 = layer_norm(axes = normed_229_axes_0, epsilon = var_8960_to_fp16, x = input_191_cast_fp16)[name = string("normed_229_cast_fp16")]; - tensor normed_231_begin_0 = const()[name = string("normed_231_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_231_end_0 = const()[name = string("normed_231_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_231_end_mask_0 = const()[name = string("normed_231_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_231_cast_fp16 = slice_by_index(begin = normed_231_begin_0, end = normed_231_end_0, end_mask = normed_231_end_mask_0, x = normed_229_cast_fp16)[name = string("normed_231_cast_fp16")]; - tensor var_8979_to_fp16 = const()[name = string("op_8979_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224466432)))]; - tensor attn_output_99_cast_fp16 = mul(x = normed_231_cast_fp16, y = var_8979_to_fp16)[name = string("attn_output_99_cast_fp16")]; - tensor hidden_states_155_cast_fp16 = add(x = hidden_states_145_cast_fp16, y = attn_output_99_cast_fp16)[name = string("hidden_states_155_cast_fp16")]; - int32 var_8992 = const()[name = string("op_8992"), val = int32(-1)]; - fp16 const_413_promoted_to_fp16 = const()[name = string("const_413_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_8994_cast_fp16 = mul(x = hidden_states_155_cast_fp16, y = const_413_promoted_to_fp16)[name = string("op_8994_cast_fp16")]; - bool input_193_interleave_0 = const()[name = string("input_193_interleave_0"), val = bool(false)]; - tensor input_193_cast_fp16 = concat(axis = var_8992, interleave = input_193_interleave_0, values = (hidden_states_155_cast_fp16, var_8994_cast_fp16))[name = string("input_193_cast_fp16")]; - tensor normed_233_axes_0 = const()[name = string("normed_233_axes_0"), val = tensor([-1])]; - fp16 var_8989_to_fp16 = const()[name = string("op_8989_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_233_cast_fp16 = layer_norm(axes = normed_233_axes_0, epsilon = var_8989_to_fp16, x = input_193_cast_fp16)[name = string("normed_233_cast_fp16")]; - tensor normed_235_begin_0 = const()[name = string("normed_235_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_235_end_0 = const()[name = string("normed_235_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_235_end_mask_0 = const()[name = string("normed_235_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_235_cast_fp16 = slice_by_index(begin = normed_235_begin_0, end = normed_235_end_0, end_mask = normed_235_end_mask_0, x = normed_233_cast_fp16)[name = string("normed_235_cast_fp16")]; - tensor var_9008_to_fp16 = const()[name = string("op_9008_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224468800)))]; - tensor x_157_cast_fp16 = mul(x = normed_235_cast_fp16, y = var_9008_to_fp16)[name = string("x_157_cast_fp16")]; - tensor var_9020 = const()[name = string("op_9020"), val = tensor([0, 2, 1])]; - tensor input_195_axes_0 = const()[name = string("input_195_axes_0"), val = tensor([2])]; - tensor var_9021_cast_fp16 = transpose(perm = var_9020, x = x_157_cast_fp16)[name = string("transpose_145")]; - tensor input_195_cast_fp16 = expand_dims(axes = input_195_axes_0, x = var_9021_cast_fp16)[name = string("input_195_cast_fp16")]; - string x_159_pad_type_0 = const()[name = string("x_159_pad_type_0"), val = string("valid")]; - tensor x_159_strides_0 = const()[name = string("x_159_strides_0"), val = tensor([1, 1])]; - tensor x_159_pad_0 = const()[name = string("x_159_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_159_dilations_0 = const()[name = string("x_159_dilations_0"), val = tensor([1, 1])]; - int32 x_159_groups_0 = const()[name = string("x_159_groups_0"), val = int32(1)]; - tensor model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1300923584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1306895616))))[name = string("model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_159_cast_fp16 = conv(dilations = x_159_dilations_0, groups = x_159_groups_0, pad = x_159_pad_0, pad_type = x_159_pad_type_0, strides = x_159_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_195_cast_fp16)[name = string("x_159_cast_fp16")]; - string b_19_pad_type_0 = const()[name = string("b_19_pad_type_0"), val = string("valid")]; - tensor b_19_strides_0 = const()[name = string("b_19_strides_0"), val = tensor([1, 1])]; - tensor b_19_pad_0 = const()[name = string("b_19_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_19_dilations_0 = const()[name = string("b_19_dilations_0"), val = tensor([1, 1])]; - int32 b_19_groups_0 = const()[name = string("b_19_groups_0"), val = int32(1)]; - tensor model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1307006272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1312978304))))[name = string("model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_19_cast_fp16 = conv(dilations = b_19_dilations_0, groups = b_19_groups_0, pad = b_19_pad_0, pad_type = b_19_pad_type_0, strides = b_19_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_195_cast_fp16)[name = string("b_19_cast_fp16")]; - string var_9046_mode_0 = const()[name = string("op_9046_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_9046_cast_fp16 = gelu(mode = var_9046_mode_0, x = x_159_cast_fp16)[name = string("op_9046_cast_fp16")]; - tensor input_197_cast_fp16 = mul(x = var_9046_cast_fp16, y = b_19_cast_fp16)[name = string("input_197_cast_fp16")]; - string e_19_pad_type_0 = const()[name = string("e_19_pad_type_0"), val = string("valid")]; - tensor e_19_strides_0 = const()[name = string("e_19_strides_0"), val = tensor([1, 1])]; - tensor e_19_pad_0 = const()[name = string("e_19_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_19_dilations_0 = const()[name = string("e_19_dilations_0"), val = tensor([1, 1])]; - int32 e_19_groups_0 = const()[name = string("e_19_groups_0"), val = int32(1)]; - tensor model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236636544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242608576))))[name = string("model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_19_cast_fp16 = conv(dilations = e_19_dilations_0, groups = e_19_groups_0, pad = e_19_pad_0, pad_type = e_19_pad_type_0, strides = e_19_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_197_cast_fp16)[name = string("e_19_cast_fp16")]; - tensor var_9054_axes_0 = const()[name = string("op_9054_axes_0"), val = tensor([2])]; - tensor var_9054_cast_fp16 = squeeze(axes = var_9054_axes_0, x = e_19_cast_fp16)[name = string("op_9054_cast_fp16")]; - tensor var_9055 = const()[name = string("op_9055"), val = tensor([0, 2, 1])]; - int32 var_9066 = const()[name = string("op_9066"), val = int32(-1)]; - fp16 const_417_promoted_to_fp16 = const()[name = string("const_417_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_157_cast_fp16 = transpose(perm = var_9055, x = var_9054_cast_fp16)[name = string("transpose_144")]; - tensor var_9068_cast_fp16 = mul(x = hidden_states_157_cast_fp16, y = const_417_promoted_to_fp16)[name = string("op_9068_cast_fp16")]; - bool input_199_interleave_0 = const()[name = string("input_199_interleave_0"), val = bool(false)]; - tensor input_199_cast_fp16 = concat(axis = var_9066, interleave = input_199_interleave_0, values = (hidden_states_157_cast_fp16, var_9068_cast_fp16))[name = string("input_199_cast_fp16")]; - tensor normed_237_axes_0 = const()[name = string("normed_237_axes_0"), val = tensor([-1])]; - fp16 var_9063_to_fp16 = const()[name = string("op_9063_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_237_cast_fp16 = layer_norm(axes = normed_237_axes_0, epsilon = var_9063_to_fp16, x = input_199_cast_fp16)[name = string("normed_237_cast_fp16")]; - tensor normed_239_begin_0 = const()[name = string("normed_239_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_239_end_0 = const()[name = string("normed_239_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_239_end_mask_0 = const()[name = string("normed_239_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_239_cast_fp16 = slice_by_index(begin = normed_239_begin_0, end = normed_239_end_0, end_mask = normed_239_end_mask_0, x = normed_237_cast_fp16)[name = string("normed_239_cast_fp16")]; - tensor var_9082_to_fp16 = const()[name = string("op_9082_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242627072)))]; - tensor hidden_states_159_cast_fp16 = mul(x = normed_239_cast_fp16, y = var_9082_to_fp16)[name = string("hidden_states_159_cast_fp16")]; - tensor hidden_states_161_cast_fp16 = add(x = hidden_states_155_cast_fp16, y = hidden_states_159_cast_fp16)[name = string("hidden_states_161_cast_fp16")]; - int32 var_9136 = const()[name = string("op_9136"), val = int32(-1)]; - fp16 const_422_promoted_to_fp16 = const()[name = string("const_422_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9138_cast_fp16 = mul(x = hidden_states_161_cast_fp16, y = const_422_promoted_to_fp16)[name = string("op_9138_cast_fp16")]; - bool input_201_interleave_0 = const()[name = string("input_201_interleave_0"), val = bool(false)]; - tensor input_201_cast_fp16 = concat(axis = var_9136, interleave = input_201_interleave_0, values = (hidden_states_161_cast_fp16, var_9138_cast_fp16))[name = string("input_201_cast_fp16")]; - tensor normed_241_axes_0 = const()[name = string("normed_241_axes_0"), val = tensor([-1])]; - fp16 var_9133_to_fp16 = const()[name = string("op_9133_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_241_cast_fp16 = layer_norm(axes = normed_241_axes_0, epsilon = var_9133_to_fp16, x = input_201_cast_fp16)[name = string("normed_241_cast_fp16")]; - tensor normed_243_begin_0 = const()[name = string("normed_243_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_243_end_0 = const()[name = string("normed_243_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_243_end_mask_0 = const()[name = string("normed_243_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_243_cast_fp16 = slice_by_index(begin = normed_243_begin_0, end = normed_243_end_0, end_mask = normed_243_end_mask_0, x = normed_241_cast_fp16)[name = string("normed_243_cast_fp16")]; - tensor var_9152_to_fp16 = const()[name = string("op_9152_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242629440)))]; - tensor hidden_states_163_cast_fp16 = mul(x = normed_243_cast_fp16, y = var_9152_to_fp16)[name = string("hidden_states_163_cast_fp16")]; - tensor var_9163 = const()[name = string("op_9163"), val = tensor([0, 2, 1])]; - tensor var_9166_axes_0 = const()[name = string("op_9166_axes_0"), val = tensor([2])]; - tensor var_9164_cast_fp16 = transpose(perm = var_9163, x = hidden_states_163_cast_fp16)[name = string("transpose_143")]; - tensor var_9166_cast_fp16 = expand_dims(axes = var_9166_axes_0, x = var_9164_cast_fp16)[name = string("op_9166_cast_fp16")]; - string query_states_81_pad_type_0 = const()[name = string("query_states_81_pad_type_0"), val = string("valid")]; - tensor query_states_81_strides_0 = const()[name = string("query_states_81_strides_0"), val = tensor([1, 1])]; - tensor query_states_81_pad_0 = const()[name = string("query_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_81_dilations_0 = const()[name = string("query_states_81_dilations_0"), val = tensor([1, 1])]; - int32 query_states_81_groups_0 = const()[name = string("query_states_81_groups_0"), val = int32(1)]; - tensor query_states_81 = conv(dilations = query_states_81_dilations_0, groups = query_states_81_groups_0, pad = query_states_81_pad_0, pad_type = query_states_81_pad_type_0, strides = query_states_81_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_9166_cast_fp16)[name = string("query_states_81")]; - string key_states_101_pad_type_0 = const()[name = string("key_states_101_pad_type_0"), val = string("valid")]; - tensor key_states_101_strides_0 = const()[name = string("key_states_101_strides_0"), val = tensor([1, 1])]; - tensor key_states_101_pad_0 = const()[name = string("key_states_101_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_101_dilations_0 = const()[name = string("key_states_101_dilations_0"), val = tensor([1, 1])]; - int32 key_states_101_groups_0 = const()[name = string("key_states_101_groups_0"), val = int32(1)]; - tensor key_states_101 = conv(dilations = key_states_101_dilations_0, groups = key_states_101_groups_0, pad = key_states_101_pad_0, pad_type = key_states_101_pad_type_0, strides = key_states_101_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_9166_cast_fp16)[name = string("key_states_101")]; - string value_states_81_pad_type_0 = const()[name = string("value_states_81_pad_type_0"), val = string("valid")]; - tensor value_states_81_strides_0 = const()[name = string("value_states_81_strides_0"), val = tensor([1, 1])]; - tensor value_states_81_pad_0 = const()[name = string("value_states_81_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_81_dilations_0 = const()[name = string("value_states_81_dilations_0"), val = tensor([1, 1])]; - int32 value_states_81_groups_0 = const()[name = string("value_states_81_groups_0"), val = int32(1)]; - tensor value_states_81 = conv(dilations = value_states_81_dilations_0, groups = value_states_81_groups_0, pad = value_states_81_pad_0, pad_type = value_states_81_pad_type_0, strides = value_states_81_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_9166_cast_fp16)[name = string("value_states_81")]; - tensor var_9208 = const()[name = string("op_9208"), val = tensor([1, 4, 256, 64])]; - tensor var_9209 = reshape(shape = var_9208, x = query_states_81)[name = string("op_9209")]; - tensor var_9214 = const()[name = string("op_9214"), val = tensor([0, 1, 3, 2])]; - tensor var_9219 = const()[name = string("op_9219"), val = tensor([1, 1, 256, 64])]; - tensor var_9220 = reshape(shape = var_9219, x = key_states_101)[name = string("op_9220")]; - tensor var_9225 = const()[name = string("op_9225"), val = tensor([0, 1, 3, 2])]; - tensor var_9230 = const()[name = string("op_9230"), val = tensor([1, 1, 256, 64])]; - tensor var_9231 = reshape(shape = var_9230, x = value_states_81)[name = string("op_9231")]; - tensor var_9236 = const()[name = string("op_9236"), val = tensor([0, 1, 3, 2])]; - int32 var_9247 = const()[name = string("op_9247"), val = int32(-1)]; - fp16 const_427_promoted = const()[name = string("const_427_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_165 = transpose(perm = var_9214, x = var_9209)[name = string("transpose_142")]; - tensor var_9249 = mul(x = hidden_states_165, y = const_427_promoted)[name = string("op_9249")]; - bool input_205_interleave_0 = const()[name = string("input_205_interleave_0"), val = bool(false)]; - tensor input_205 = concat(axis = var_9247, interleave = input_205_interleave_0, values = (hidden_states_165, var_9249))[name = string("input_205")]; - tensor normed_245_axes_0 = const()[name = string("normed_245_axes_0"), val = tensor([-1])]; - fp16 var_9244_to_fp16 = const()[name = string("op_9244_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_245_cast_fp16 = layer_norm(axes = normed_245_axes_0, epsilon = var_9244_to_fp16, x = input_205)[name = string("normed_245_cast_fp16")]; - tensor normed_247_begin_0 = const()[name = string("normed_247_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_247_end_0 = const()[name = string("normed_247_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_247_end_mask_0 = const()[name = string("normed_247_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_247 = slice_by_index(begin = normed_247_begin_0, end = normed_247_end_0, end_mask = normed_247_end_mask_0, x = normed_245_cast_fp16)[name = string("normed_247")]; - tensor var_9263_to_fp16 = const()[name = string("op_9263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242631808)))]; - tensor q_21_cast_fp16 = mul(x = normed_247, y = var_9263_to_fp16)[name = string("q_21_cast_fp16")]; - int32 var_9274 = const()[name = string("op_9274"), val = int32(-1)]; - fp16 const_431_promoted = const()[name = string("const_431_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_167 = transpose(perm = var_9225, x = var_9220)[name = string("transpose_141")]; - tensor var_9276 = mul(x = hidden_states_167, y = const_431_promoted)[name = string("op_9276")]; - bool input_207_interleave_0 = const()[name = string("input_207_interleave_0"), val = bool(false)]; - tensor input_207 = concat(axis = var_9274, interleave = input_207_interleave_0, values = (hidden_states_167, var_9276))[name = string("input_207")]; - tensor normed_249_axes_0 = const()[name = string("normed_249_axes_0"), val = tensor([-1])]; - fp16 var_9271_to_fp16 = const()[name = string("op_9271_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_249_cast_fp16 = layer_norm(axes = normed_249_axes_0, epsilon = var_9271_to_fp16, x = input_207)[name = string("normed_249_cast_fp16")]; - tensor normed_251_begin_0 = const()[name = string("normed_251_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_251_end_0 = const()[name = string("normed_251_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_251_end_mask_0 = const()[name = string("normed_251_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_251 = slice_by_index(begin = normed_251_begin_0, end = normed_251_end_0, end_mask = normed_251_end_mask_0, x = normed_249_cast_fp16)[name = string("normed_251")]; - tensor var_9290_to_fp16 = const()[name = string("op_9290_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242632384)))]; - tensor k_21_cast_fp16 = mul(x = normed_251, y = var_9290_to_fp16)[name = string("k_21_cast_fp16")]; - tensor var_9304_cast_fp16 = mul(x = q_21_cast_fp16, y = cos_5)[name = string("op_9304_cast_fp16")]; - tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21_cast_fp16)[name = string("x1_41_cast_fp16")]; - tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21_cast_fp16)[name = string("x2_41_cast_fp16")]; - fp16 const_437_promoted_to_fp16 = const()[name = string("const_437_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9325_cast_fp16 = mul(x = x2_41_cast_fp16, y = const_437_promoted_to_fp16)[name = string("op_9325_cast_fp16")]; - int32 var_9327 = const()[name = string("op_9327"), val = int32(-1)]; - bool var_9328_interleave_0 = const()[name = string("op_9328_interleave_0"), val = bool(false)]; - tensor var_9328_cast_fp16 = concat(axis = var_9327, interleave = var_9328_interleave_0, values = (var_9325_cast_fp16, x1_41_cast_fp16))[name = string("op_9328_cast_fp16")]; - tensor var_9329_cast_fp16 = mul(x = var_9328_cast_fp16, y = sin_5)[name = string("op_9329_cast_fp16")]; - tensor query_states_83_cast_fp16 = add(x = var_9304_cast_fp16, y = var_9329_cast_fp16)[name = string("query_states_83_cast_fp16")]; - tensor var_9332_cast_fp16 = mul(x = k_21_cast_fp16, y = cos_5)[name = string("op_9332_cast_fp16")]; - tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21_cast_fp16)[name = string("x1_43_cast_fp16")]; - tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21_cast_fp16)[name = string("x2_43_cast_fp16")]; - fp16 const_440_promoted_to_fp16 = const()[name = string("const_440_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9353_cast_fp16 = mul(x = x2_43_cast_fp16, y = const_440_promoted_to_fp16)[name = string("op_9353_cast_fp16")]; - int32 var_9355 = const()[name = string("op_9355"), val = int32(-1)]; - bool var_9356_interleave_0 = const()[name = string("op_9356_interleave_0"), val = bool(false)]; - tensor var_9356_cast_fp16 = concat(axis = var_9355, interleave = var_9356_interleave_0, values = (var_9353_cast_fp16, x1_43_cast_fp16))[name = string("op_9356_cast_fp16")]; - tensor var_9357_cast_fp16 = mul(x = var_9356_cast_fp16, y = sin_5)[name = string("op_9357_cast_fp16")]; - tensor key_states_103_cast_fp16 = add(x = var_9332_cast_fp16, y = var_9357_cast_fp16)[name = string("key_states_103_cast_fp16")]; - tensor key_slice_19_begin_0 = const()[name = string("key_slice_19_begin_0"), val = tensor([9, 0, 0, 0])]; - tensor key_slice_19_end_0 = const()[name = string("key_slice_19_end_0"), val = tensor([10, 1, 512, 256])]; - tensor key_slice_19_end_mask_0 = const()[name = string("key_slice_19_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_19_cast_fp16 = slice_by_index(begin = key_slice_19_begin_0, end = key_slice_19_end_0, end_mask = key_slice_19_end_mask_0, x = coreml_update_state_71)[name = string("key_slice_19_cast_fp16")]; - tensor var_9394_begin_0 = const()[name = string("op_9394_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_9394_end_0 = const()[name = string("op_9394_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_9394_end_mask_0 = const()[name = string("op_9394_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_9394_cast_fp16 = slice_by_index(begin = var_9394_begin_0, end = var_9394_end_0, end_mask = var_9394_end_mask_0, x = key_slice_19_cast_fp16)[name = string("op_9394_cast_fp16")]; - int32 var_9421 = const()[name = string("op_9421"), val = int32(2)]; - bool shifted_key_19_interleave_0 = const()[name = string("shifted_key_19_interleave_0"), val = bool(false)]; - tensor shifted_key_19_cast_fp16 = concat(axis = var_9421, interleave = shifted_key_19_interleave_0, values = (var_9394_cast_fp16, key_states_103_cast_fp16))[name = string("shifted_key_19_cast_fp16")]; - tensor concat_144 = const()[name = string("concat_144"), val = tensor([9, 0, 0, 0])]; - tensor concat_145 = const()[name = string("concat_145"), val = tensor([10, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_144, begin_mask = model_model_kv_cache_local_internal_tensor_assign_19_begin_mask_0, end = concat_145, end_mask = model_model_kv_cache_local_internal_tensor_assign_19_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_19_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_19_stride_0, update = shifted_key_19_cast_fp16, x = coreml_update_state_71)[name = string("model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_19_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_176_write_state")]; - tensor coreml_update_state_72 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_176")]; - tensor value_slice_19_begin_0 = const()[name = string("value_slice_19_begin_0"), val = tensor([31, 0, 0, 0])]; - tensor value_slice_19_end_0 = const()[name = string("value_slice_19_end_0"), val = tensor([32, 1, 512, 256])]; - tensor value_slice_19_end_mask_0 = const()[name = string("value_slice_19_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_19_cast_fp16 = slice_by_index(begin = value_slice_19_begin_0, end = value_slice_19_end_0, end_mask = value_slice_19_end_mask_0, x = coreml_update_state_72)[name = string("value_slice_19_cast_fp16")]; - tensor var_9464_begin_0 = const()[name = string("op_9464_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_9464_end_0 = const()[name = string("op_9464_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_9464_end_mask_0 = const()[name = string("op_9464_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_9464_cast_fp16 = slice_by_index(begin = var_9464_begin_0, end = var_9464_end_0, end_mask = var_9464_end_mask_0, x = value_slice_19_cast_fp16)[name = string("op_9464_cast_fp16")]; - int32 var_9491 = const()[name = string("op_9491"), val = int32(2)]; - bool shifted_value_19_interleave_0 = const()[name = string("shifted_value_19_interleave_0"), val = bool(false)]; - tensor value_states_83 = transpose(perm = var_9236, x = var_9231)[name = string("transpose_140")]; - tensor shifted_value_19_cast_fp16 = concat(axis = var_9491, interleave = shifted_value_19_interleave_0, values = (var_9464_cast_fp16, value_states_83))[name = string("shifted_value_19_cast_fp16")]; - tensor concat_146 = const()[name = string("concat_146"), val = tensor([31, 0, 0, 0])]; - tensor concat_147 = const()[name = string("concat_147"), val = tensor([32, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_146, begin_mask = model_model_kv_cache_local_internal_tensor_assign_20_begin_mask_0, end = concat_147, end_mask = model_model_kv_cache_local_internal_tensor_assign_20_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_20_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_20_stride_0, update = shifted_value_19_cast_fp16, x = coreml_update_state_72)[name = string("model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_20_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_177_write_state")]; - tensor coreml_update_state_73 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_177")]; - tensor var_9519_begin_0 = const()[name = string("op_9519_begin_0"), val = tensor([9, 0, 0, 0])]; - tensor var_9519_end_0 = const()[name = string("op_9519_end_0"), val = tensor([10, 1, 512, 256])]; - tensor var_9519_end_mask_0 = const()[name = string("op_9519_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_9519_cast_fp16 = slice_by_index(begin = var_9519_begin_0, end = var_9519_end_0, end_mask = var_9519_end_mask_0, x = coreml_update_state_73)[name = string("op_9519_cast_fp16")]; - tensor var_9526_begin_0 = const()[name = string("op_9526_begin_0"), val = tensor([31, 0, 0, 0])]; - tensor var_9526_end_0 = const()[name = string("op_9526_end_0"), val = tensor([32, 1, 512, 256])]; - tensor var_9526_end_mask_0 = const()[name = string("op_9526_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_9526_cast_fp16 = slice_by_index(begin = var_9526_begin_0, end = var_9526_end_0, end_mask = var_9526_end_mask_0, x = coreml_update_state_73)[name = string("op_9526_cast_fp16")]; - tensor var_9565 = const()[name = string("op_9565"), val = tensor([1, 4, 1, 1])]; - tensor x_165_cast_fp16 = tile(reps = var_9565, x = var_9519_cast_fp16)[name = string("x_165_cast_fp16")]; - tensor var_9585 = const()[name = string("op_9585"), val = tensor([1, 4, 1, 1])]; - tensor x_171_cast_fp16 = tile(reps = var_9585, x = var_9526_cast_fp16)[name = string("x_171_cast_fp16")]; - bool var_9612_transpose_x_0 = const()[name = string("op_9612_transpose_x_0"), val = bool(false)]; - bool var_9612_transpose_y_0 = const()[name = string("op_9612_transpose_y_0"), val = bool(true)]; - tensor var_9612 = matmul(transpose_x = var_9612_transpose_x_0, transpose_y = var_9612_transpose_y_0, x = query_states_83_cast_fp16, y = x_165_cast_fp16)[name = string("op_9612")]; - fp16 var_9613_to_fp16 = const()[name = string("op_9613_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_41_cast_fp16 = mul(x = var_9612, y = var_9613_to_fp16)[name = string("attn_weights_41_cast_fp16")]; - tensor attn_weights_43_cast_fp16 = add(x = attn_weights_41_cast_fp16, y = mask_slice_1)[name = string("attn_weights_43_cast_fp16")]; - int32 var_9648 = const()[name = string("op_9648"), val = int32(-1)]; - tensor var_9650_cast_fp16 = softmax(axis = var_9648, x = attn_weights_43_cast_fp16)[name = string("op_9650_cast_fp16")]; - tensor concat_152 = const()[name = string("concat_152"), val = tensor([4, 64, 512])]; - tensor reshape_30_cast_fp16 = reshape(shape = concat_152, x = var_9650_cast_fp16)[name = string("reshape_30_cast_fp16")]; - tensor concat_153 = const()[name = string("concat_153"), val = tensor([4, 512, 256])]; - tensor reshape_31_cast_fp16 = reshape(shape = concat_153, x = x_171_cast_fp16)[name = string("reshape_31_cast_fp16")]; - bool matmul_10_transpose_x_0 = const()[name = string("matmul_10_transpose_x_0"), val = bool(false)]; - bool matmul_10_transpose_y_0 = const()[name = string("matmul_10_transpose_y_0"), val = bool(false)]; - tensor matmul_10_cast_fp16 = matmul(transpose_x = matmul_10_transpose_x_0, transpose_y = matmul_10_transpose_y_0, x = reshape_30_cast_fp16, y = reshape_31_cast_fp16)[name = string("matmul_10_cast_fp16")]; - tensor concat_157 = const()[name = string("concat_157"), val = tensor([1, 4, 64, 256])]; - tensor reshape_32_cast_fp16 = reshape(shape = concat_157, x = matmul_10_cast_fp16)[name = string("reshape_32_cast_fp16")]; - tensor var_9662_perm_0 = const()[name = string("op_9662_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_9681 = const()[name = string("op_9681"), val = tensor([1, 64, 1024])]; - tensor var_9662_cast_fp16 = transpose(perm = var_9662_perm_0, x = reshape_32_cast_fp16)[name = string("transpose_139")]; - tensor attn_output_105_cast_fp16 = reshape(shape = var_9681, x = var_9662_cast_fp16)[name = string("attn_output_105_cast_fp16")]; - tensor var_9686 = const()[name = string("op_9686"), val = tensor([0, 2, 1])]; - string var_9702_pad_type_0 = const()[name = string("op_9702_pad_type_0"), val = string("valid")]; - int32 var_9702_groups_0 = const()[name = string("op_9702_groups_0"), val = int32(1)]; - tensor var_9702_strides_0 = const()[name = string("op_9702_strides_0"), val = tensor([1])]; - tensor var_9702_pad_0 = const()[name = string("op_9702_pad_0"), val = tensor([0, 0])]; - tensor var_9702_dilations_0 = const()[name = string("op_9702_dilations_0"), val = tensor([1])]; - tensor squeeze_10_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242632960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243517760))))[name = string("squeeze_10_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_9687_cast_fp16 = transpose(perm = var_9686, x = attn_output_105_cast_fp16)[name = string("transpose_138")]; - tensor var_9702_cast_fp16 = conv(dilations = var_9702_dilations_0, groups = var_9702_groups_0, pad = var_9702_pad_0, pad_type = var_9702_pad_type_0, strides = var_9702_strides_0, weight = squeeze_10_cast_fp16_to_fp32_to_fp16_palettized, x = var_9687_cast_fp16)[name = string("op_9702_cast_fp16")]; - tensor var_9706 = const()[name = string("op_9706"), val = tensor([0, 2, 1])]; - int32 var_9717 = const()[name = string("op_9717"), val = int32(-1)]; - fp16 const_451_promoted_to_fp16 = const()[name = string("const_451_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_169_cast_fp16 = transpose(perm = var_9706, x = var_9702_cast_fp16)[name = string("transpose_137")]; - tensor var_9719_cast_fp16 = mul(x = hidden_states_169_cast_fp16, y = const_451_promoted_to_fp16)[name = string("op_9719_cast_fp16")]; - bool input_211_interleave_0 = const()[name = string("input_211_interleave_0"), val = bool(false)]; - tensor input_211_cast_fp16 = concat(axis = var_9717, interleave = input_211_interleave_0, values = (hidden_states_169_cast_fp16, var_9719_cast_fp16))[name = string("input_211_cast_fp16")]; - tensor normed_253_axes_0 = const()[name = string("normed_253_axes_0"), val = tensor([-1])]; - fp16 var_9714_to_fp16 = const()[name = string("op_9714_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_253_cast_fp16 = layer_norm(axes = normed_253_axes_0, epsilon = var_9714_to_fp16, x = input_211_cast_fp16)[name = string("normed_253_cast_fp16")]; - tensor normed_255_begin_0 = const()[name = string("normed_255_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_255_end_0 = const()[name = string("normed_255_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_255_end_mask_0 = const()[name = string("normed_255_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_255_cast_fp16 = slice_by_index(begin = normed_255_begin_0, end = normed_255_end_0, end_mask = normed_255_end_mask_0, x = normed_253_cast_fp16)[name = string("normed_255_cast_fp16")]; - tensor var_9733_to_fp16 = const()[name = string("op_9733_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243536256)))]; - tensor attn_output_109_cast_fp16 = mul(x = normed_255_cast_fp16, y = var_9733_to_fp16)[name = string("attn_output_109_cast_fp16")]; - tensor hidden_states_171_cast_fp16 = add(x = hidden_states_161_cast_fp16, y = attn_output_109_cast_fp16)[name = string("hidden_states_171_cast_fp16")]; - int32 var_9746 = const()[name = string("op_9746"), val = int32(-1)]; - fp16 const_455_promoted_to_fp16 = const()[name = string("const_455_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9748_cast_fp16 = mul(x = hidden_states_171_cast_fp16, y = const_455_promoted_to_fp16)[name = string("op_9748_cast_fp16")]; - bool input_213_interleave_0 = const()[name = string("input_213_interleave_0"), val = bool(false)]; - tensor input_213_cast_fp16 = concat(axis = var_9746, interleave = input_213_interleave_0, values = (hidden_states_171_cast_fp16, var_9748_cast_fp16))[name = string("input_213_cast_fp16")]; - tensor normed_257_axes_0 = const()[name = string("normed_257_axes_0"), val = tensor([-1])]; - fp16 var_9743_to_fp16 = const()[name = string("op_9743_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_257_cast_fp16 = layer_norm(axes = normed_257_axes_0, epsilon = var_9743_to_fp16, x = input_213_cast_fp16)[name = string("normed_257_cast_fp16")]; - tensor normed_259_begin_0 = const()[name = string("normed_259_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_259_end_0 = const()[name = string("normed_259_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_259_end_mask_0 = const()[name = string("normed_259_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_259_cast_fp16 = slice_by_index(begin = normed_259_begin_0, end = normed_259_end_0, end_mask = normed_259_end_mask_0, x = normed_257_cast_fp16)[name = string("normed_259_cast_fp16")]; - tensor var_9762_to_fp16 = const()[name = string("op_9762_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243538624)))]; - tensor x_173_cast_fp16 = mul(x = normed_259_cast_fp16, y = var_9762_to_fp16)[name = string("x_173_cast_fp16")]; - tensor var_9774 = const()[name = string("op_9774"), val = tensor([0, 2, 1])]; - tensor input_215_axes_0 = const()[name = string("input_215_axes_0"), val = tensor([2])]; - tensor var_9775_cast_fp16 = transpose(perm = var_9774, x = x_173_cast_fp16)[name = string("transpose_136")]; - tensor input_215_cast_fp16 = expand_dims(axes = input_215_axes_0, x = var_9775_cast_fp16)[name = string("input_215_cast_fp16")]; - string x_175_pad_type_0 = const()[name = string("x_175_pad_type_0"), val = string("valid")]; - tensor x_175_strides_0 = const()[name = string("x_175_strides_0"), val = tensor([1, 1])]; - tensor x_175_pad_0 = const()[name = string("x_175_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_175_dilations_0 = const()[name = string("x_175_dilations_0"), val = tensor([1, 1])]; - int32 x_175_groups_0 = const()[name = string("x_175_groups_0"), val = int32(1)]; - tensor model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1313088960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1319060992))))[name = string("model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_175_cast_fp16 = conv(dilations = x_175_dilations_0, groups = x_175_groups_0, pad = x_175_pad_0, pad_type = x_175_pad_type_0, strides = x_175_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_215_cast_fp16)[name = string("x_175_cast_fp16")]; - string b_21_pad_type_0 = const()[name = string("b_21_pad_type_0"), val = string("valid")]; - tensor b_21_strides_0 = const()[name = string("b_21_strides_0"), val = tensor([1, 1])]; - tensor b_21_pad_0 = const()[name = string("b_21_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_21_dilations_0 = const()[name = string("b_21_dilations_0"), val = tensor([1, 1])]; - int32 b_21_groups_0 = const()[name = string("b_21_groups_0"), val = int32(1)]; - tensor model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1319171648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1325143680))))[name = string("model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_21_cast_fp16 = conv(dilations = b_21_dilations_0, groups = b_21_groups_0, pad = b_21_pad_0, pad_type = b_21_pad_type_0, strides = b_21_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_215_cast_fp16)[name = string("b_21_cast_fp16")]; - string var_9800_mode_0 = const()[name = string("op_9800_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_9800_cast_fp16 = gelu(mode = var_9800_mode_0, x = x_175_cast_fp16)[name = string("op_9800_cast_fp16")]; - tensor input_217_cast_fp16 = mul(x = var_9800_cast_fp16, y = b_21_cast_fp16)[name = string("input_217_cast_fp16")]; - string e_21_pad_type_0 = const()[name = string("e_21_pad_type_0"), val = string("valid")]; - tensor e_21_strides_0 = const()[name = string("e_21_strides_0"), val = tensor([1, 1])]; - tensor e_21_pad_0 = const()[name = string("e_21_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_21_dilations_0 = const()[name = string("e_21_dilations_0"), val = tensor([1, 1])]; - int32 e_21_groups_0 = const()[name = string("e_21_groups_0"), val = int32(1)]; - tensor model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255706368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261678400))))[name = string("model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_21_cast_fp16 = conv(dilations = e_21_dilations_0, groups = e_21_groups_0, pad = e_21_pad_0, pad_type = e_21_pad_type_0, strides = e_21_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_217_cast_fp16)[name = string("e_21_cast_fp16")]; - tensor var_9808_axes_0 = const()[name = string("op_9808_axes_0"), val = tensor([2])]; - tensor var_9808_cast_fp16 = squeeze(axes = var_9808_axes_0, x = e_21_cast_fp16)[name = string("op_9808_cast_fp16")]; - tensor var_9809 = const()[name = string("op_9809"), val = tensor([0, 2, 1])]; - int32 var_9820 = const()[name = string("op_9820"), val = int32(-1)]; - fp16 const_459_promoted_to_fp16 = const()[name = string("const_459_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_173_cast_fp16 = transpose(perm = var_9809, x = var_9808_cast_fp16)[name = string("transpose_135")]; - tensor var_9822_cast_fp16 = mul(x = hidden_states_173_cast_fp16, y = const_459_promoted_to_fp16)[name = string("op_9822_cast_fp16")]; - bool input_219_interleave_0 = const()[name = string("input_219_interleave_0"), val = bool(false)]; - tensor input_219_cast_fp16 = concat(axis = var_9820, interleave = input_219_interleave_0, values = (hidden_states_173_cast_fp16, var_9822_cast_fp16))[name = string("input_219_cast_fp16")]; - tensor normed_261_axes_0 = const()[name = string("normed_261_axes_0"), val = tensor([-1])]; - fp16 var_9817_to_fp16 = const()[name = string("op_9817_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_261_cast_fp16 = layer_norm(axes = normed_261_axes_0, epsilon = var_9817_to_fp16, x = input_219_cast_fp16)[name = string("normed_261_cast_fp16")]; - tensor normed_263_begin_0 = const()[name = string("normed_263_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_263_end_0 = const()[name = string("normed_263_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_263_end_mask_0 = const()[name = string("normed_263_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_263_cast_fp16 = slice_by_index(begin = normed_263_begin_0, end = normed_263_end_0, end_mask = normed_263_end_mask_0, x = normed_261_cast_fp16)[name = string("normed_263_cast_fp16")]; - tensor var_9836_to_fp16 = const()[name = string("op_9836_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261696896)))]; - tensor hidden_states_175_cast_fp16 = mul(x = normed_263_cast_fp16, y = var_9836_to_fp16)[name = string("hidden_states_175_cast_fp16")]; - tensor hidden_states_177_cast_fp16 = add(x = hidden_states_171_cast_fp16, y = hidden_states_175_cast_fp16)[name = string("hidden_states_177_cast_fp16")]; - int32 var_9890 = const()[name = string("op_9890"), val = int32(-1)]; - fp16 const_464_promoted_to_fp16 = const()[name = string("const_464_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_9892_cast_fp16 = mul(x = hidden_states_177_cast_fp16, y = const_464_promoted_to_fp16)[name = string("op_9892_cast_fp16")]; - bool input_221_interleave_0 = const()[name = string("input_221_interleave_0"), val = bool(false)]; - tensor input_221_cast_fp16 = concat(axis = var_9890, interleave = input_221_interleave_0, values = (hidden_states_177_cast_fp16, var_9892_cast_fp16))[name = string("input_221_cast_fp16")]; - tensor normed_265_axes_0 = const()[name = string("normed_265_axes_0"), val = tensor([-1])]; - fp16 var_9887_to_fp16 = const()[name = string("op_9887_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_265_cast_fp16 = layer_norm(axes = normed_265_axes_0, epsilon = var_9887_to_fp16, x = input_221_cast_fp16)[name = string("normed_265_cast_fp16")]; - tensor normed_267_begin_0 = const()[name = string("normed_267_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_267_end_0 = const()[name = string("normed_267_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_267_end_mask_0 = const()[name = string("normed_267_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_267_cast_fp16 = slice_by_index(begin = normed_267_begin_0, end = normed_267_end_0, end_mask = normed_267_end_mask_0, x = normed_265_cast_fp16)[name = string("normed_267_cast_fp16")]; - tensor var_9906_to_fp16 = const()[name = string("op_9906_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261699264)))]; - tensor hidden_states_179_cast_fp16 = mul(x = normed_267_cast_fp16, y = var_9906_to_fp16)[name = string("hidden_states_179_cast_fp16")]; - tensor var_9917 = const()[name = string("op_9917"), val = tensor([0, 2, 1])]; - tensor var_9920_axes_0 = const()[name = string("op_9920_axes_0"), val = tensor([2])]; - tensor var_9918_cast_fp16 = transpose(perm = var_9917, x = hidden_states_179_cast_fp16)[name = string("transpose_134")]; - tensor var_9920_cast_fp16 = expand_dims(axes = var_9920_axes_0, x = var_9918_cast_fp16)[name = string("op_9920_cast_fp16")]; - string query_states_89_pad_type_0 = const()[name = string("query_states_89_pad_type_0"), val = string("valid")]; - tensor query_states_89_strides_0 = const()[name = string("query_states_89_strides_0"), val = tensor([1, 1])]; - tensor query_states_89_pad_0 = const()[name = string("query_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_89_dilations_0 = const()[name = string("query_states_89_dilations_0"), val = tensor([1, 1])]; - int32 query_states_89_groups_0 = const()[name = string("query_states_89_groups_0"), val = int32(1)]; - tensor query_states_89 = conv(dilations = query_states_89_dilations_0, groups = query_states_89_groups_0, pad = query_states_89_pad_0, pad_type = query_states_89_pad_type_0, strides = query_states_89_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_9920_cast_fp16)[name = string("query_states_89")]; - string key_states_111_pad_type_0 = const()[name = string("key_states_111_pad_type_0"), val = string("valid")]; - tensor key_states_111_strides_0 = const()[name = string("key_states_111_strides_0"), val = tensor([1, 1])]; - tensor key_states_111_pad_0 = const()[name = string("key_states_111_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_111_dilations_0 = const()[name = string("key_states_111_dilations_0"), val = tensor([1, 1])]; - int32 key_states_111_groups_0 = const()[name = string("key_states_111_groups_0"), val = int32(1)]; - tensor key_states_111 = conv(dilations = key_states_111_dilations_0, groups = key_states_111_groups_0, pad = key_states_111_pad_0, pad_type = key_states_111_pad_type_0, strides = key_states_111_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_9920_cast_fp16)[name = string("key_states_111")]; - string value_states_89_pad_type_0 = const()[name = string("value_states_89_pad_type_0"), val = string("valid")]; - tensor value_states_89_strides_0 = const()[name = string("value_states_89_strides_0"), val = tensor([1, 1])]; - tensor value_states_89_pad_0 = const()[name = string("value_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_89_dilations_0 = const()[name = string("value_states_89_dilations_0"), val = tensor([1, 1])]; - int32 value_states_89_groups_0 = const()[name = string("value_states_89_groups_0"), val = int32(1)]; - tensor value_states_89 = conv(dilations = value_states_89_dilations_0, groups = value_states_89_groups_0, pad = value_states_89_pad_0, pad_type = value_states_89_pad_type_0, strides = value_states_89_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_9920_cast_fp16)[name = string("value_states_89")]; - tensor var_9962 = const()[name = string("op_9962"), val = tensor([1, 4, 256, 64])]; - tensor var_9963 = reshape(shape = var_9962, x = query_states_89)[name = string("op_9963")]; - tensor var_9968 = const()[name = string("op_9968"), val = tensor([0, 1, 3, 2])]; - tensor var_9973 = const()[name = string("op_9973"), val = tensor([1, 1, 256, 64])]; - tensor var_9974 = reshape(shape = var_9973, x = key_states_111)[name = string("op_9974")]; - tensor var_9979 = const()[name = string("op_9979"), val = tensor([0, 1, 3, 2])]; - tensor var_9984 = const()[name = string("op_9984"), val = tensor([1, 1, 256, 64])]; - tensor var_9985 = reshape(shape = var_9984, x = value_states_89)[name = string("op_9985")]; - tensor var_9990 = const()[name = string("op_9990"), val = tensor([0, 1, 3, 2])]; - int32 var_10001 = const()[name = string("op_10001"), val = int32(-1)]; - fp16 const_469_promoted = const()[name = string("const_469_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_181 = transpose(perm = var_9968, x = var_9963)[name = string("transpose_133")]; - tensor var_10003 = mul(x = hidden_states_181, y = const_469_promoted)[name = string("op_10003")]; - bool input_225_interleave_0 = const()[name = string("input_225_interleave_0"), val = bool(false)]; - tensor input_225 = concat(axis = var_10001, interleave = input_225_interleave_0, values = (hidden_states_181, var_10003))[name = string("input_225")]; - tensor normed_269_axes_0 = const()[name = string("normed_269_axes_0"), val = tensor([-1])]; - fp16 var_9998_to_fp16 = const()[name = string("op_9998_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_269_cast_fp16 = layer_norm(axes = normed_269_axes_0, epsilon = var_9998_to_fp16, x = input_225)[name = string("normed_269_cast_fp16")]; - tensor normed_271_begin_0 = const()[name = string("normed_271_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_271_end_0 = const()[name = string("normed_271_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_271_end_mask_0 = const()[name = string("normed_271_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_271 = slice_by_index(begin = normed_271_begin_0, end = normed_271_end_0, end_mask = normed_271_end_mask_0, x = normed_269_cast_fp16)[name = string("normed_271")]; - tensor var_10017_to_fp16 = const()[name = string("op_10017_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261701632)))]; - tensor q_23_cast_fp16 = mul(x = normed_271, y = var_10017_to_fp16)[name = string("q_23_cast_fp16")]; - int32 var_10028 = const()[name = string("op_10028"), val = int32(-1)]; - fp16 const_473_promoted = const()[name = string("const_473_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_183 = transpose(perm = var_9979, x = var_9974)[name = string("transpose_132")]; - tensor var_10030 = mul(x = hidden_states_183, y = const_473_promoted)[name = string("op_10030")]; - bool input_227_interleave_0 = const()[name = string("input_227_interleave_0"), val = bool(false)]; - tensor input_227 = concat(axis = var_10028, interleave = input_227_interleave_0, values = (hidden_states_183, var_10030))[name = string("input_227")]; - tensor normed_273_axes_0 = const()[name = string("normed_273_axes_0"), val = tensor([-1])]; - fp16 var_10025_to_fp16 = const()[name = string("op_10025_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_273_cast_fp16 = layer_norm(axes = normed_273_axes_0, epsilon = var_10025_to_fp16, x = input_227)[name = string("normed_273_cast_fp16")]; - tensor normed_275_begin_0 = const()[name = string("normed_275_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_275_end_0 = const()[name = string("normed_275_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_275_end_mask_0 = const()[name = string("normed_275_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_275 = slice_by_index(begin = normed_275_begin_0, end = normed_275_end_0, end_mask = normed_275_end_mask_0, x = normed_273_cast_fp16)[name = string("normed_275")]; - tensor var_10044_to_fp16 = const()[name = string("op_10044_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261702208)))]; - tensor k_23_cast_fp16 = mul(x = normed_275, y = var_10044_to_fp16)[name = string("k_23_cast_fp16")]; - tensor var_10058_cast_fp16 = mul(x = q_23_cast_fp16, y = cos_35)[name = string("op_10058_cast_fp16")]; - tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23_cast_fp16)[name = string("x1_45_cast_fp16")]; - tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23_cast_fp16)[name = string("x2_45_cast_fp16")]; - fp16 const_479_promoted_to_fp16 = const()[name = string("const_479_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10079_cast_fp16 = mul(x = x2_45_cast_fp16, y = const_479_promoted_to_fp16)[name = string("op_10079_cast_fp16")]; - int32 var_10081 = const()[name = string("op_10081"), val = int32(-1)]; - bool var_10082_interleave_0 = const()[name = string("op_10082_interleave_0"), val = bool(false)]; - tensor var_10082_cast_fp16 = concat(axis = var_10081, interleave = var_10082_interleave_0, values = (var_10079_cast_fp16, x1_45_cast_fp16))[name = string("op_10082_cast_fp16")]; - tensor var_10083_cast_fp16 = mul(x = var_10082_cast_fp16, y = sin_35)[name = string("op_10083_cast_fp16")]; - tensor query_states_91_cast_fp16 = add(x = var_10058_cast_fp16, y = var_10083_cast_fp16)[name = string("query_states_91_cast_fp16")]; - tensor var_10086_cast_fp16 = mul(x = k_23_cast_fp16, y = cos_35)[name = string("op_10086_cast_fp16")]; - tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23_cast_fp16)[name = string("x1_47_cast_fp16")]; - tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23_cast_fp16)[name = string("x2_47_cast_fp16")]; - fp16 const_482_promoted_to_fp16 = const()[name = string("const_482_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10107_cast_fp16 = mul(x = x2_47_cast_fp16, y = const_482_promoted_to_fp16)[name = string("op_10107_cast_fp16")]; - int32 var_10109 = const()[name = string("op_10109"), val = int32(-1)]; - bool var_10110_interleave_0 = const()[name = string("op_10110_interleave_0"), val = bool(false)]; - tensor var_10110_cast_fp16 = concat(axis = var_10109, interleave = var_10110_interleave_0, values = (var_10107_cast_fp16, x1_47_cast_fp16))[name = string("op_10110_cast_fp16")]; - tensor var_10111_cast_fp16 = mul(x = var_10110_cast_fp16, y = sin_35)[name = string("op_10111_cast_fp16")]; - tensor key_states_113_cast_fp16 = add(x = var_10086_cast_fp16, y = var_10111_cast_fp16)[name = string("key_states_113_cast_fp16")]; - tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([1])]; - tensor expand_dims_113 = const()[name = string("expand_dims_113"), val = tensor([0])]; - tensor expand_dims_115 = const()[name = string("expand_dims_115"), val = tensor([0])]; - tensor expand_dims_116 = const()[name = string("expand_dims_116"), val = tensor([2])]; - int32 concat_160_axis_0 = const()[name = string("concat_160_axis_0"), val = int32(0)]; - bool concat_160_interleave_0 = const()[name = string("concat_160_interleave_0"), val = bool(false)]; - tensor concat_160 = concat(axis = concat_160_axis_0, interleave = concat_160_interleave_0, values = (expand_dims_112, expand_dims_113, current_pos, expand_dims_115))[name = string("concat_160")]; - tensor concat_161_values1_0 = const()[name = string("concat_161_values1_0"), val = tensor([0])]; - tensor concat_161_values3_0 = const()[name = string("concat_161_values3_0"), val = tensor([0])]; - int32 concat_161_axis_0 = const()[name = string("concat_161_axis_0"), val = int32(0)]; - bool concat_161_interleave_0 = const()[name = string("concat_161_interleave_0"), val = bool(false)]; - tensor concat_161 = concat(axis = concat_161_axis_0, interleave = concat_161_interleave_0, values = (expand_dims_116, concat_161_values1_0, end_pos_1, concat_161_values3_0))[name = string("concat_161")]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_160, begin_mask = model_model_kv_cache_global_internal_tensor_assign_3_begin_mask_0, end = concat_161, end_mask = model_model_kv_cache_global_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_3_stride_0, update = key_states_113_cast_fp16, x = coreml_update_state_63)[name = string("model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_178_write_state")]; - tensor coreml_update_state_74 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_178")]; - tensor expand_dims_118 = const()[name = string("expand_dims_118"), val = tensor([5])]; - tensor expand_dims_119 = const()[name = string("expand_dims_119"), val = tensor([0])]; - tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; - tensor expand_dims_122 = const()[name = string("expand_dims_122"), val = tensor([6])]; - int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; - bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; - tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (expand_dims_118, expand_dims_119, current_pos, expand_dims_121))[name = string("concat_164")]; - tensor concat_165_values1_0 = const()[name = string("concat_165_values1_0"), val = tensor([0])]; - tensor concat_165_values3_0 = const()[name = string("concat_165_values3_0"), val = tensor([0])]; - int32 concat_165_axis_0 = const()[name = string("concat_165_axis_0"), val = int32(0)]; - bool concat_165_interleave_0 = const()[name = string("concat_165_interleave_0"), val = bool(false)]; - tensor concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (expand_dims_122, concat_165_values1_0, end_pos_1, concat_165_values3_0))[name = string("concat_165")]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_91 = transpose(perm = var_9990, x = var_9985)[name = string("transpose_131")]; - tensor model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_164, begin_mask = model_model_kv_cache_global_internal_tensor_assign_4_begin_mask_0, end = concat_165, end_mask = model_model_kv_cache_global_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_4_stride_0, update = value_states_91, x = coreml_update_state_74)[name = string("model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_179_write_state")]; - tensor coreml_update_state_75 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_179")]; - tensor var_10210_begin_0 = const()[name = string("op_10210_begin_0"), val = tensor([1, 0, 0, 0])]; - tensor var_10210_end_0 = const()[name = string("op_10210_end_0"), val = tensor([2, 1, 4096, 256])]; - tensor var_10210_end_mask_0 = const()[name = string("op_10210_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_10210_cast_fp16 = slice_by_index(begin = var_10210_begin_0, end = var_10210_end_0, end_mask = var_10210_end_mask_0, x = coreml_update_state_75)[name = string("op_10210_cast_fp16")]; - tensor var_10217_begin_0 = const()[name = string("op_10217_begin_0"), val = tensor([5, 0, 0, 0])]; - tensor var_10217_end_0 = const()[name = string("op_10217_end_0"), val = tensor([6, 1, 4096, 256])]; - tensor var_10217_end_mask_0 = const()[name = string("op_10217_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_10217_cast_fp16 = slice_by_index(begin = var_10217_begin_0, end = var_10217_end_0, end_mask = var_10217_end_mask_0, x = coreml_update_state_75)[name = string("op_10217_cast_fp16")]; - tensor var_10256 = const()[name = string("op_10256"), val = tensor([1, 4, 1, 1])]; - tensor x_181_cast_fp16 = tile(reps = var_10256, x = var_10210_cast_fp16)[name = string("x_181_cast_fp16")]; - tensor var_10276 = const()[name = string("op_10276"), val = tensor([1, 4, 1, 1])]; - tensor x_187_cast_fp16 = tile(reps = var_10276, x = var_10217_cast_fp16)[name = string("x_187_cast_fp16")]; - bool var_10303_transpose_x_0 = const()[name = string("op_10303_transpose_x_0"), val = bool(false)]; - bool var_10303_transpose_y_0 = const()[name = string("op_10303_transpose_y_0"), val = bool(true)]; - tensor var_10303 = matmul(transpose_x = var_10303_transpose_x_0, transpose_y = var_10303_transpose_y_0, x = query_states_91_cast_fp16, y = x_181_cast_fp16)[name = string("op_10303")]; - fp16 var_10304_to_fp16 = const()[name = string("op_10304_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_45_cast_fp16 = mul(x = var_10303, y = var_10304_to_fp16)[name = string("attn_weights_45_cast_fp16")]; - tensor attn_weights_47_cast_fp16 = add(x = attn_weights_45_cast_fp16, y = causal_mask)[name = string("attn_weights_47_cast_fp16")]; - int32 var_10339 = const()[name = string("op_10339"), val = int32(-1)]; - tensor var_10341_cast_fp16 = softmax(axis = var_10339, x = attn_weights_47_cast_fp16)[name = string("op_10341_cast_fp16")]; - tensor concat_170 = const()[name = string("concat_170"), val = tensor([4, 64, 4096])]; - tensor reshape_33_cast_fp16 = reshape(shape = concat_170, x = var_10341_cast_fp16)[name = string("reshape_33_cast_fp16")]; - tensor concat_171 = const()[name = string("concat_171"), val = tensor([4, 4096, 256])]; - tensor reshape_34_cast_fp16 = reshape(shape = concat_171, x = x_187_cast_fp16)[name = string("reshape_34_cast_fp16")]; - bool matmul_11_transpose_x_0 = const()[name = string("matmul_11_transpose_x_0"), val = bool(false)]; - bool matmul_11_transpose_y_0 = const()[name = string("matmul_11_transpose_y_0"), val = bool(false)]; - tensor matmul_11_cast_fp16 = matmul(transpose_x = matmul_11_transpose_x_0, transpose_y = matmul_11_transpose_y_0, x = reshape_33_cast_fp16, y = reshape_34_cast_fp16)[name = string("matmul_11_cast_fp16")]; - tensor concat_175 = const()[name = string("concat_175"), val = tensor([1, 4, 64, 256])]; - tensor reshape_35_cast_fp16 = reshape(shape = concat_175, x = matmul_11_cast_fp16)[name = string("reshape_35_cast_fp16")]; - tensor var_10353_perm_0 = const()[name = string("op_10353_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_10372 = const()[name = string("op_10372"), val = tensor([1, 64, 1024])]; - tensor var_10353_cast_fp16 = transpose(perm = var_10353_perm_0, x = reshape_35_cast_fp16)[name = string("transpose_130")]; - tensor attn_output_115_cast_fp16 = reshape(shape = var_10372, x = var_10353_cast_fp16)[name = string("attn_output_115_cast_fp16")]; - tensor var_10377 = const()[name = string("op_10377"), val = tensor([0, 2, 1])]; - string var_10393_pad_type_0 = const()[name = string("op_10393_pad_type_0"), val = string("valid")]; - int32 var_10393_groups_0 = const()[name = string("op_10393_groups_0"), val = int32(1)]; - tensor var_10393_strides_0 = const()[name = string("op_10393_strides_0"), val = tensor([1])]; - tensor var_10393_pad_0 = const()[name = string("op_10393_pad_0"), val = tensor([0, 0])]; - tensor var_10393_dilations_0 = const()[name = string("op_10393_dilations_0"), val = tensor([1])]; - tensor squeeze_11_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261702784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262587584))))[name = string("squeeze_11_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_10378_cast_fp16 = transpose(perm = var_10377, x = attn_output_115_cast_fp16)[name = string("transpose_129")]; - tensor var_10393_cast_fp16 = conv(dilations = var_10393_dilations_0, groups = var_10393_groups_0, pad = var_10393_pad_0, pad_type = var_10393_pad_type_0, strides = var_10393_strides_0, weight = squeeze_11_cast_fp16_to_fp32_to_fp16_palettized, x = var_10378_cast_fp16)[name = string("op_10393_cast_fp16")]; - tensor var_10397 = const()[name = string("op_10397"), val = tensor([0, 2, 1])]; - int32 var_10408 = const()[name = string("op_10408"), val = int32(-1)]; - fp16 const_494_promoted_to_fp16 = const()[name = string("const_494_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_185_cast_fp16 = transpose(perm = var_10397, x = var_10393_cast_fp16)[name = string("transpose_128")]; - tensor var_10410_cast_fp16 = mul(x = hidden_states_185_cast_fp16, y = const_494_promoted_to_fp16)[name = string("op_10410_cast_fp16")]; - bool input_231_interleave_0 = const()[name = string("input_231_interleave_0"), val = bool(false)]; - tensor input_231_cast_fp16 = concat(axis = var_10408, interleave = input_231_interleave_0, values = (hidden_states_185_cast_fp16, var_10410_cast_fp16))[name = string("input_231_cast_fp16")]; - tensor normed_277_axes_0 = const()[name = string("normed_277_axes_0"), val = tensor([-1])]; - fp16 var_10405_to_fp16 = const()[name = string("op_10405_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_277_cast_fp16 = layer_norm(axes = normed_277_axes_0, epsilon = var_10405_to_fp16, x = input_231_cast_fp16)[name = string("normed_277_cast_fp16")]; - tensor normed_279_begin_0 = const()[name = string("normed_279_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_279_end_0 = const()[name = string("normed_279_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_279_end_mask_0 = const()[name = string("normed_279_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_279_cast_fp16 = slice_by_index(begin = normed_279_begin_0, end = normed_279_end_0, end_mask = normed_279_end_mask_0, x = normed_277_cast_fp16)[name = string("normed_279_cast_fp16")]; - tensor var_10424_to_fp16 = const()[name = string("op_10424_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262606080)))]; - tensor attn_output_119_cast_fp16 = mul(x = normed_279_cast_fp16, y = var_10424_to_fp16)[name = string("attn_output_119_cast_fp16")]; - tensor hidden_states_187_cast_fp16 = add(x = hidden_states_177_cast_fp16, y = attn_output_119_cast_fp16)[name = string("hidden_states_187_cast_fp16")]; - int32 var_10437 = const()[name = string("op_10437"), val = int32(-1)]; - fp16 const_498_promoted_to_fp16 = const()[name = string("const_498_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10439_cast_fp16 = mul(x = hidden_states_187_cast_fp16, y = const_498_promoted_to_fp16)[name = string("op_10439_cast_fp16")]; - bool input_233_interleave_0 = const()[name = string("input_233_interleave_0"), val = bool(false)]; - tensor input_233_cast_fp16 = concat(axis = var_10437, interleave = input_233_interleave_0, values = (hidden_states_187_cast_fp16, var_10439_cast_fp16))[name = string("input_233_cast_fp16")]; - tensor normed_281_axes_0 = const()[name = string("normed_281_axes_0"), val = tensor([-1])]; - fp16 var_10434_to_fp16 = const()[name = string("op_10434_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_281_cast_fp16 = layer_norm(axes = normed_281_axes_0, epsilon = var_10434_to_fp16, x = input_233_cast_fp16)[name = string("normed_281_cast_fp16")]; - tensor normed_283_begin_0 = const()[name = string("normed_283_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_283_end_0 = const()[name = string("normed_283_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_283_end_mask_0 = const()[name = string("normed_283_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_283_cast_fp16 = slice_by_index(begin = normed_283_begin_0, end = normed_283_end_0, end_mask = normed_283_end_mask_0, x = normed_281_cast_fp16)[name = string("normed_283_cast_fp16")]; - tensor var_10453_to_fp16 = const()[name = string("op_10453_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262608448)))]; - tensor x_189_cast_fp16 = mul(x = normed_283_cast_fp16, y = var_10453_to_fp16)[name = string("x_189_cast_fp16")]; - tensor var_10465 = const()[name = string("op_10465"), val = tensor([0, 2, 1])]; - tensor input_235_axes_0 = const()[name = string("input_235_axes_0"), val = tensor([2])]; - tensor var_10466_cast_fp16 = transpose(perm = var_10465, x = x_189_cast_fp16)[name = string("transpose_127")]; - tensor input_235_cast_fp16 = expand_dims(axes = input_235_axes_0, x = var_10466_cast_fp16)[name = string("input_235_cast_fp16")]; - string x_191_pad_type_0 = const()[name = string("x_191_pad_type_0"), val = string("valid")]; - tensor x_191_strides_0 = const()[name = string("x_191_strides_0"), val = tensor([1, 1])]; - tensor x_191_pad_0 = const()[name = string("x_191_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_191_dilations_0 = const()[name = string("x_191_dilations_0"), val = tensor([1, 1])]; - int32 x_191_groups_0 = const()[name = string("x_191_groups_0"), val = int32(1)]; - tensor model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1325254336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1331226368))))[name = string("model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_191_cast_fp16 = conv(dilations = x_191_dilations_0, groups = x_191_groups_0, pad = x_191_pad_0, pad_type = x_191_pad_type_0, strides = x_191_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_235_cast_fp16)[name = string("x_191_cast_fp16")]; - string b_23_pad_type_0 = const()[name = string("b_23_pad_type_0"), val = string("valid")]; - tensor b_23_strides_0 = const()[name = string("b_23_strides_0"), val = tensor([1, 1])]; - tensor b_23_pad_0 = const()[name = string("b_23_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_23_dilations_0 = const()[name = string("b_23_dilations_0"), val = tensor([1, 1])]; - int32 b_23_groups_0 = const()[name = string("b_23_groups_0"), val = int32(1)]; - tensor model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1331337024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1337309056))))[name = string("model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_23_cast_fp16 = conv(dilations = b_23_dilations_0, groups = b_23_groups_0, pad = b_23_pad_0, pad_type = b_23_pad_type_0, strides = b_23_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_235_cast_fp16)[name = string("b_23_cast_fp16")]; - string var_10491_mode_0 = const()[name = string("op_10491_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_10491_cast_fp16 = gelu(mode = var_10491_mode_0, x = x_191_cast_fp16)[name = string("op_10491_cast_fp16")]; - tensor input_237_cast_fp16 = mul(x = var_10491_cast_fp16, y = b_23_cast_fp16)[name = string("input_237_cast_fp16")]; - string e_23_pad_type_0 = const()[name = string("e_23_pad_type_0"), val = string("valid")]; - tensor e_23_strides_0 = const()[name = string("e_23_strides_0"), val = tensor([1, 1])]; - tensor e_23_pad_0 = const()[name = string("e_23_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_23_dilations_0 = const()[name = string("e_23_dilations_0"), val = tensor([1, 1])]; - int32 e_23_groups_0 = const()[name = string("e_23_groups_0"), val = int32(1)]; - tensor model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274776192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280748224))))[name = string("model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_23_cast_fp16 = conv(dilations = e_23_dilations_0, groups = e_23_groups_0, pad = e_23_pad_0, pad_type = e_23_pad_type_0, strides = e_23_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_237_cast_fp16)[name = string("e_23_cast_fp16")]; - tensor var_10499_axes_0 = const()[name = string("op_10499_axes_0"), val = tensor([2])]; - tensor var_10499_cast_fp16 = squeeze(axes = var_10499_axes_0, x = e_23_cast_fp16)[name = string("op_10499_cast_fp16")]; - tensor var_10500 = const()[name = string("op_10500"), val = tensor([0, 2, 1])]; - int32 var_10511 = const()[name = string("op_10511"), val = int32(-1)]; - fp16 const_502_promoted_to_fp16 = const()[name = string("const_502_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_189_cast_fp16 = transpose(perm = var_10500, x = var_10499_cast_fp16)[name = string("transpose_126")]; - tensor var_10513_cast_fp16 = mul(x = hidden_states_189_cast_fp16, y = const_502_promoted_to_fp16)[name = string("op_10513_cast_fp16")]; - bool input_239_interleave_0 = const()[name = string("input_239_interleave_0"), val = bool(false)]; - tensor input_239_cast_fp16 = concat(axis = var_10511, interleave = input_239_interleave_0, values = (hidden_states_189_cast_fp16, var_10513_cast_fp16))[name = string("input_239_cast_fp16")]; - tensor normed_285_axes_0 = const()[name = string("normed_285_axes_0"), val = tensor([-1])]; - fp16 var_10508_to_fp16 = const()[name = string("op_10508_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_285_cast_fp16 = layer_norm(axes = normed_285_axes_0, epsilon = var_10508_to_fp16, x = input_239_cast_fp16)[name = string("normed_285_cast_fp16")]; - tensor normed_287_begin_0 = const()[name = string("normed_287_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_287_end_0 = const()[name = string("normed_287_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_287_end_mask_0 = const()[name = string("normed_287_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_287_cast_fp16 = slice_by_index(begin = normed_287_begin_0, end = normed_287_end_0, end_mask = normed_287_end_mask_0, x = normed_285_cast_fp16)[name = string("normed_287_cast_fp16")]; - tensor var_10527_to_fp16 = const()[name = string("op_10527_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280766720)))]; - tensor hidden_states_191_cast_fp16 = mul(x = normed_287_cast_fp16, y = var_10527_to_fp16)[name = string("hidden_states_191_cast_fp16")]; - tensor hidden_states_193_cast_fp16 = add(x = hidden_states_187_cast_fp16, y = hidden_states_191_cast_fp16)[name = string("hidden_states_193_cast_fp16")]; - int32 var_10581 = const()[name = string("op_10581"), val = int32(-1)]; - fp16 const_507_promoted_to_fp16 = const()[name = string("const_507_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10583_cast_fp16 = mul(x = hidden_states_193_cast_fp16, y = const_507_promoted_to_fp16)[name = string("op_10583_cast_fp16")]; - bool input_241_interleave_0 = const()[name = string("input_241_interleave_0"), val = bool(false)]; - tensor input_241_cast_fp16 = concat(axis = var_10581, interleave = input_241_interleave_0, values = (hidden_states_193_cast_fp16, var_10583_cast_fp16))[name = string("input_241_cast_fp16")]; - tensor normed_289_axes_0 = const()[name = string("normed_289_axes_0"), val = tensor([-1])]; - fp16 var_10578_to_fp16 = const()[name = string("op_10578_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_289_cast_fp16 = layer_norm(axes = normed_289_axes_0, epsilon = var_10578_to_fp16, x = input_241_cast_fp16)[name = string("normed_289_cast_fp16")]; - tensor normed_291_begin_0 = const()[name = string("normed_291_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_291_end_0 = const()[name = string("normed_291_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_291_end_mask_0 = const()[name = string("normed_291_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_291_cast_fp16 = slice_by_index(begin = normed_291_begin_0, end = normed_291_end_0, end_mask = normed_291_end_mask_0, x = normed_289_cast_fp16)[name = string("normed_291_cast_fp16")]; - tensor var_10597_to_fp16 = const()[name = string("op_10597_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280769088)))]; - tensor hidden_states_195_cast_fp16 = mul(x = normed_291_cast_fp16, y = var_10597_to_fp16)[name = string("hidden_states_195_cast_fp16")]; - tensor var_10608 = const()[name = string("op_10608"), val = tensor([0, 2, 1])]; - tensor var_10611_axes_0 = const()[name = string("op_10611_axes_0"), val = tensor([2])]; - tensor var_10609_cast_fp16 = transpose(perm = var_10608, x = hidden_states_195_cast_fp16)[name = string("transpose_125")]; - tensor var_10611_cast_fp16 = expand_dims(axes = var_10611_axes_0, x = var_10609_cast_fp16)[name = string("op_10611_cast_fp16")]; - string query_states_97_pad_type_0 = const()[name = string("query_states_97_pad_type_0"), val = string("valid")]; - tensor query_states_97_strides_0 = const()[name = string("query_states_97_strides_0"), val = tensor([1, 1])]; - tensor query_states_97_pad_0 = const()[name = string("query_states_97_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_97_dilations_0 = const()[name = string("query_states_97_dilations_0"), val = tensor([1, 1])]; - int32 query_states_97_groups_0 = const()[name = string("query_states_97_groups_0"), val = int32(1)]; - tensor query_states_97 = conv(dilations = query_states_97_dilations_0, groups = query_states_97_groups_0, pad = query_states_97_pad_0, pad_type = query_states_97_pad_type_0, strides = query_states_97_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_10611_cast_fp16)[name = string("query_states_97")]; - string key_states_121_pad_type_0 = const()[name = string("key_states_121_pad_type_0"), val = string("valid")]; - tensor key_states_121_strides_0 = const()[name = string("key_states_121_strides_0"), val = tensor([1, 1])]; - tensor key_states_121_pad_0 = const()[name = string("key_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_121_dilations_0 = const()[name = string("key_states_121_dilations_0"), val = tensor([1, 1])]; - int32 key_states_121_groups_0 = const()[name = string("key_states_121_groups_0"), val = int32(1)]; - tensor key_states_121 = conv(dilations = key_states_121_dilations_0, groups = key_states_121_groups_0, pad = key_states_121_pad_0, pad_type = key_states_121_pad_type_0, strides = key_states_121_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_10611_cast_fp16)[name = string("key_states_121")]; - string value_states_97_pad_type_0 = const()[name = string("value_states_97_pad_type_0"), val = string("valid")]; - tensor value_states_97_strides_0 = const()[name = string("value_states_97_strides_0"), val = tensor([1, 1])]; - tensor value_states_97_pad_0 = const()[name = string("value_states_97_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_97_dilations_0 = const()[name = string("value_states_97_dilations_0"), val = tensor([1, 1])]; - int32 value_states_97_groups_0 = const()[name = string("value_states_97_groups_0"), val = int32(1)]; - tensor value_states_97 = conv(dilations = value_states_97_dilations_0, groups = value_states_97_groups_0, pad = value_states_97_pad_0, pad_type = value_states_97_pad_type_0, strides = value_states_97_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_10611_cast_fp16)[name = string("value_states_97")]; - tensor var_10653 = const()[name = string("op_10653"), val = tensor([1, 4, 256, 64])]; - tensor var_10654 = reshape(shape = var_10653, x = query_states_97)[name = string("op_10654")]; - tensor var_10659 = const()[name = string("op_10659"), val = tensor([0, 1, 3, 2])]; - tensor var_10664 = const()[name = string("op_10664"), val = tensor([1, 1, 256, 64])]; - tensor var_10665 = reshape(shape = var_10664, x = key_states_121)[name = string("op_10665")]; - tensor var_10670 = const()[name = string("op_10670"), val = tensor([0, 1, 3, 2])]; - tensor var_10675 = const()[name = string("op_10675"), val = tensor([1, 1, 256, 64])]; - tensor var_10676 = reshape(shape = var_10675, x = value_states_97)[name = string("op_10676")]; - tensor var_10681 = const()[name = string("op_10681"), val = tensor([0, 1, 3, 2])]; - int32 var_10692 = const()[name = string("op_10692"), val = int32(-1)]; - fp16 const_512_promoted = const()[name = string("const_512_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_197 = transpose(perm = var_10659, x = var_10654)[name = string("transpose_124")]; - tensor var_10694 = mul(x = hidden_states_197, y = const_512_promoted)[name = string("op_10694")]; - bool input_245_interleave_0 = const()[name = string("input_245_interleave_0"), val = bool(false)]; - tensor input_245 = concat(axis = var_10692, interleave = input_245_interleave_0, values = (hidden_states_197, var_10694))[name = string("input_245")]; - tensor normed_293_axes_0 = const()[name = string("normed_293_axes_0"), val = tensor([-1])]; - fp16 var_10689_to_fp16 = const()[name = string("op_10689_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_293_cast_fp16 = layer_norm(axes = normed_293_axes_0, epsilon = var_10689_to_fp16, x = input_245)[name = string("normed_293_cast_fp16")]; - tensor normed_295_begin_0 = const()[name = string("normed_295_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_295_end_0 = const()[name = string("normed_295_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_295_end_mask_0 = const()[name = string("normed_295_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_295 = slice_by_index(begin = normed_295_begin_0, end = normed_295_end_0, end_mask = normed_295_end_mask_0, x = normed_293_cast_fp16)[name = string("normed_295")]; - tensor var_10708_to_fp16 = const()[name = string("op_10708_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280771456)))]; - tensor q_25_cast_fp16 = mul(x = normed_295, y = var_10708_to_fp16)[name = string("q_25_cast_fp16")]; - int32 var_10719 = const()[name = string("op_10719"), val = int32(-1)]; - fp16 const_516_promoted = const()[name = string("const_516_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_199 = transpose(perm = var_10670, x = var_10665)[name = string("transpose_123")]; - tensor var_10721 = mul(x = hidden_states_199, y = const_516_promoted)[name = string("op_10721")]; - bool input_247_interleave_0 = const()[name = string("input_247_interleave_0"), val = bool(false)]; - tensor input_247 = concat(axis = var_10719, interleave = input_247_interleave_0, values = (hidden_states_199, var_10721))[name = string("input_247")]; - tensor normed_297_axes_0 = const()[name = string("normed_297_axes_0"), val = tensor([-1])]; - fp16 var_10716_to_fp16 = const()[name = string("op_10716_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_297_cast_fp16 = layer_norm(axes = normed_297_axes_0, epsilon = var_10716_to_fp16, x = input_247)[name = string("normed_297_cast_fp16")]; - tensor normed_299_begin_0 = const()[name = string("normed_299_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_299_end_0 = const()[name = string("normed_299_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_299_end_mask_0 = const()[name = string("normed_299_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_299 = slice_by_index(begin = normed_299_begin_0, end = normed_299_end_0, end_mask = normed_299_end_mask_0, x = normed_297_cast_fp16)[name = string("normed_299")]; - tensor var_10735_to_fp16 = const()[name = string("op_10735_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280772032)))]; - tensor k_25_cast_fp16 = mul(x = normed_299, y = var_10735_to_fp16)[name = string("k_25_cast_fp16")]; - tensor var_10749_cast_fp16 = mul(x = q_25_cast_fp16, y = cos_5)[name = string("op_10749_cast_fp16")]; - tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25_cast_fp16)[name = string("x1_49_cast_fp16")]; - tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25_cast_fp16)[name = string("x2_49_cast_fp16")]; - fp16 const_522_promoted_to_fp16 = const()[name = string("const_522_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10770_cast_fp16 = mul(x = x2_49_cast_fp16, y = const_522_promoted_to_fp16)[name = string("op_10770_cast_fp16")]; - int32 var_10772 = const()[name = string("op_10772"), val = int32(-1)]; - bool var_10773_interleave_0 = const()[name = string("op_10773_interleave_0"), val = bool(false)]; - tensor var_10773_cast_fp16 = concat(axis = var_10772, interleave = var_10773_interleave_0, values = (var_10770_cast_fp16, x1_49_cast_fp16))[name = string("op_10773_cast_fp16")]; - tensor var_10774_cast_fp16 = mul(x = var_10773_cast_fp16, y = sin_5)[name = string("op_10774_cast_fp16")]; - tensor query_states_99_cast_fp16 = add(x = var_10749_cast_fp16, y = var_10774_cast_fp16)[name = string("query_states_99_cast_fp16")]; - tensor var_10777_cast_fp16 = mul(x = k_25_cast_fp16, y = cos_5)[name = string("op_10777_cast_fp16")]; - tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25_cast_fp16)[name = string("x1_51_cast_fp16")]; - tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25_cast_fp16)[name = string("x2_51_cast_fp16")]; - fp16 const_525_promoted_to_fp16 = const()[name = string("const_525_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_10798_cast_fp16 = mul(x = x2_51_cast_fp16, y = const_525_promoted_to_fp16)[name = string("op_10798_cast_fp16")]; - int32 var_10800 = const()[name = string("op_10800"), val = int32(-1)]; - bool var_10801_interleave_0 = const()[name = string("op_10801_interleave_0"), val = bool(false)]; - tensor var_10801_cast_fp16 = concat(axis = var_10800, interleave = var_10801_interleave_0, values = (var_10798_cast_fp16, x1_51_cast_fp16))[name = string("op_10801_cast_fp16")]; - tensor var_10802_cast_fp16 = mul(x = var_10801_cast_fp16, y = sin_5)[name = string("op_10802_cast_fp16")]; - tensor key_states_123_cast_fp16 = add(x = var_10777_cast_fp16, y = var_10802_cast_fp16)[name = string("key_states_123_cast_fp16")]; - tensor key_slice_21_begin_0 = const()[name = string("key_slice_21_begin_0"), val = tensor([10, 0, 0, 0])]; - tensor key_slice_21_end_0 = const()[name = string("key_slice_21_end_0"), val = tensor([11, 1, 512, 256])]; - tensor key_slice_21_end_mask_0 = const()[name = string("key_slice_21_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_21_cast_fp16 = slice_by_index(begin = key_slice_21_begin_0, end = key_slice_21_end_0, end_mask = key_slice_21_end_mask_0, x = coreml_update_state_73)[name = string("key_slice_21_cast_fp16")]; - tensor var_10839_begin_0 = const()[name = string("op_10839_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_10839_end_0 = const()[name = string("op_10839_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_10839_end_mask_0 = const()[name = string("op_10839_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_10839_cast_fp16 = slice_by_index(begin = var_10839_begin_0, end = var_10839_end_0, end_mask = var_10839_end_mask_0, x = key_slice_21_cast_fp16)[name = string("op_10839_cast_fp16")]; - int32 var_10866 = const()[name = string("op_10866"), val = int32(2)]; - bool shifted_key_21_interleave_0 = const()[name = string("shifted_key_21_interleave_0"), val = bool(false)]; - tensor shifted_key_21_cast_fp16 = concat(axis = var_10866, interleave = shifted_key_21_interleave_0, values = (var_10839_cast_fp16, key_states_123_cast_fp16))[name = string("shifted_key_21_cast_fp16")]; - tensor concat_176 = const()[name = string("concat_176"), val = tensor([10, 0, 0, 0])]; - tensor concat_177 = const()[name = string("concat_177"), val = tensor([11, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_176, begin_mask = model_model_kv_cache_local_internal_tensor_assign_21_begin_mask_0, end = concat_177, end_mask = model_model_kv_cache_local_internal_tensor_assign_21_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_21_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_21_stride_0, update = shifted_key_21_cast_fp16, x = coreml_update_state_73)[name = string("model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_21_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_180_write_state")]; - tensor coreml_update_state_76 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_180")]; - tensor value_slice_21_begin_0 = const()[name = string("value_slice_21_begin_0"), val = tensor([32, 0, 0, 0])]; - tensor value_slice_21_end_0 = const()[name = string("value_slice_21_end_0"), val = tensor([33, 1, 512, 256])]; - tensor value_slice_21_end_mask_0 = const()[name = string("value_slice_21_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_21_cast_fp16 = slice_by_index(begin = value_slice_21_begin_0, end = value_slice_21_end_0, end_mask = value_slice_21_end_mask_0, x = coreml_update_state_76)[name = string("value_slice_21_cast_fp16")]; - tensor var_10909_begin_0 = const()[name = string("op_10909_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_10909_end_0 = const()[name = string("op_10909_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_10909_end_mask_0 = const()[name = string("op_10909_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_10909_cast_fp16 = slice_by_index(begin = var_10909_begin_0, end = var_10909_end_0, end_mask = var_10909_end_mask_0, x = value_slice_21_cast_fp16)[name = string("op_10909_cast_fp16")]; - int32 var_10936 = const()[name = string("op_10936"), val = int32(2)]; - bool shifted_value_21_interleave_0 = const()[name = string("shifted_value_21_interleave_0"), val = bool(false)]; - tensor value_states_99 = transpose(perm = var_10681, x = var_10676)[name = string("transpose_122")]; - tensor shifted_value_21_cast_fp16 = concat(axis = var_10936, interleave = shifted_value_21_interleave_0, values = (var_10909_cast_fp16, value_states_99))[name = string("shifted_value_21_cast_fp16")]; - tensor concat_178 = const()[name = string("concat_178"), val = tensor([32, 0, 0, 0])]; - tensor concat_179 = const()[name = string("concat_179"), val = tensor([33, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_178, begin_mask = model_model_kv_cache_local_internal_tensor_assign_22_begin_mask_0, end = concat_179, end_mask = model_model_kv_cache_local_internal_tensor_assign_22_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_22_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_22_stride_0, update = shifted_value_21_cast_fp16, x = coreml_update_state_76)[name = string("model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_22_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_181_write_state")]; - tensor coreml_update_state_77 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_181")]; - tensor var_10964_begin_0 = const()[name = string("op_10964_begin_0"), val = tensor([10, 0, 0, 0])]; - tensor var_10964_end_0 = const()[name = string("op_10964_end_0"), val = tensor([11, 1, 512, 256])]; - tensor var_10964_end_mask_0 = const()[name = string("op_10964_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_10964_cast_fp16 = slice_by_index(begin = var_10964_begin_0, end = var_10964_end_0, end_mask = var_10964_end_mask_0, x = coreml_update_state_77)[name = string("op_10964_cast_fp16")]; - tensor var_10971_begin_0 = const()[name = string("op_10971_begin_0"), val = tensor([32, 0, 0, 0])]; - tensor var_10971_end_0 = const()[name = string("op_10971_end_0"), val = tensor([33, 1, 512, 256])]; - tensor var_10971_end_mask_0 = const()[name = string("op_10971_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_10971_cast_fp16 = slice_by_index(begin = var_10971_begin_0, end = var_10971_end_0, end_mask = var_10971_end_mask_0, x = coreml_update_state_77)[name = string("op_10971_cast_fp16")]; - tensor var_11010 = const()[name = string("op_11010"), val = tensor([1, 4, 1, 1])]; - tensor x_197_cast_fp16 = tile(reps = var_11010, x = var_10964_cast_fp16)[name = string("x_197_cast_fp16")]; - tensor var_11030 = const()[name = string("op_11030"), val = tensor([1, 4, 1, 1])]; - tensor x_203_cast_fp16 = tile(reps = var_11030, x = var_10971_cast_fp16)[name = string("x_203_cast_fp16")]; - bool var_11057_transpose_x_0 = const()[name = string("op_11057_transpose_x_0"), val = bool(false)]; - bool var_11057_transpose_y_0 = const()[name = string("op_11057_transpose_y_0"), val = bool(true)]; - tensor var_11057 = matmul(transpose_x = var_11057_transpose_x_0, transpose_y = var_11057_transpose_y_0, x = query_states_99_cast_fp16, y = x_197_cast_fp16)[name = string("op_11057")]; - fp16 var_11058_to_fp16 = const()[name = string("op_11058_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_49_cast_fp16 = mul(x = var_11057, y = var_11058_to_fp16)[name = string("attn_weights_49_cast_fp16")]; - tensor attn_weights_51_cast_fp16 = add(x = attn_weights_49_cast_fp16, y = mask_slice_1)[name = string("attn_weights_51_cast_fp16")]; - int32 var_11093 = const()[name = string("op_11093"), val = int32(-1)]; - tensor var_11095_cast_fp16 = softmax(axis = var_11093, x = attn_weights_51_cast_fp16)[name = string("op_11095_cast_fp16")]; - tensor concat_184 = const()[name = string("concat_184"), val = tensor([4, 64, 512])]; - tensor reshape_36_cast_fp16 = reshape(shape = concat_184, x = var_11095_cast_fp16)[name = string("reshape_36_cast_fp16")]; - tensor concat_185 = const()[name = string("concat_185"), val = tensor([4, 512, 256])]; - tensor reshape_37_cast_fp16 = reshape(shape = concat_185, x = x_203_cast_fp16)[name = string("reshape_37_cast_fp16")]; - bool matmul_12_transpose_x_0 = const()[name = string("matmul_12_transpose_x_0"), val = bool(false)]; - bool matmul_12_transpose_y_0 = const()[name = string("matmul_12_transpose_y_0"), val = bool(false)]; - tensor matmul_12_cast_fp16 = matmul(transpose_x = matmul_12_transpose_x_0, transpose_y = matmul_12_transpose_y_0, x = reshape_36_cast_fp16, y = reshape_37_cast_fp16)[name = string("matmul_12_cast_fp16")]; - tensor concat_189 = const()[name = string("concat_189"), val = tensor([1, 4, 64, 256])]; - tensor reshape_38_cast_fp16 = reshape(shape = concat_189, x = matmul_12_cast_fp16)[name = string("reshape_38_cast_fp16")]; - tensor var_11107_perm_0 = const()[name = string("op_11107_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_11126 = const()[name = string("op_11126"), val = tensor([1, 64, 1024])]; - tensor var_11107_cast_fp16 = transpose(perm = var_11107_perm_0, x = reshape_38_cast_fp16)[name = string("transpose_121")]; - tensor attn_output_125_cast_fp16 = reshape(shape = var_11126, x = var_11107_cast_fp16)[name = string("attn_output_125_cast_fp16")]; - tensor var_11131 = const()[name = string("op_11131"), val = tensor([0, 2, 1])]; - string var_11147_pad_type_0 = const()[name = string("op_11147_pad_type_0"), val = string("valid")]; - int32 var_11147_groups_0 = const()[name = string("op_11147_groups_0"), val = int32(1)]; - tensor var_11147_strides_0 = const()[name = string("op_11147_strides_0"), val = tensor([1])]; - tensor var_11147_pad_0 = const()[name = string("op_11147_pad_0"), val = tensor([0, 0])]; - tensor var_11147_dilations_0 = const()[name = string("op_11147_dilations_0"), val = tensor([1])]; - tensor squeeze_12_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280772608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281657408))))[name = string("squeeze_12_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_11132_cast_fp16 = transpose(perm = var_11131, x = attn_output_125_cast_fp16)[name = string("transpose_120")]; - tensor var_11147_cast_fp16 = conv(dilations = var_11147_dilations_0, groups = var_11147_groups_0, pad = var_11147_pad_0, pad_type = var_11147_pad_type_0, strides = var_11147_strides_0, weight = squeeze_12_cast_fp16_to_fp32_to_fp16_palettized, x = var_11132_cast_fp16)[name = string("op_11147_cast_fp16")]; - tensor var_11151 = const()[name = string("op_11151"), val = tensor([0, 2, 1])]; - int32 var_11162 = const()[name = string("op_11162"), val = int32(-1)]; - fp16 const_536_promoted_to_fp16 = const()[name = string("const_536_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_201_cast_fp16 = transpose(perm = var_11151, x = var_11147_cast_fp16)[name = string("transpose_119")]; - tensor var_11164_cast_fp16 = mul(x = hidden_states_201_cast_fp16, y = const_536_promoted_to_fp16)[name = string("op_11164_cast_fp16")]; - bool input_251_interleave_0 = const()[name = string("input_251_interleave_0"), val = bool(false)]; - tensor input_251_cast_fp16 = concat(axis = var_11162, interleave = input_251_interleave_0, values = (hidden_states_201_cast_fp16, var_11164_cast_fp16))[name = string("input_251_cast_fp16")]; - tensor normed_301_axes_0 = const()[name = string("normed_301_axes_0"), val = tensor([-1])]; - fp16 var_11159_to_fp16 = const()[name = string("op_11159_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_301_cast_fp16 = layer_norm(axes = normed_301_axes_0, epsilon = var_11159_to_fp16, x = input_251_cast_fp16)[name = string("normed_301_cast_fp16")]; - tensor normed_303_begin_0 = const()[name = string("normed_303_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_303_end_0 = const()[name = string("normed_303_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_303_end_mask_0 = const()[name = string("normed_303_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_303_cast_fp16 = slice_by_index(begin = normed_303_begin_0, end = normed_303_end_0, end_mask = normed_303_end_mask_0, x = normed_301_cast_fp16)[name = string("normed_303_cast_fp16")]; - tensor var_11178_to_fp16 = const()[name = string("op_11178_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281675904)))]; - tensor attn_output_129_cast_fp16 = mul(x = normed_303_cast_fp16, y = var_11178_to_fp16)[name = string("attn_output_129_cast_fp16")]; - tensor hidden_states_203_cast_fp16 = add(x = hidden_states_193_cast_fp16, y = attn_output_129_cast_fp16)[name = string("hidden_states_203_cast_fp16")]; - int32 var_11191 = const()[name = string("op_11191"), val = int32(-1)]; - fp16 const_540_promoted_to_fp16 = const()[name = string("const_540_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11193_cast_fp16 = mul(x = hidden_states_203_cast_fp16, y = const_540_promoted_to_fp16)[name = string("op_11193_cast_fp16")]; - bool input_253_interleave_0 = const()[name = string("input_253_interleave_0"), val = bool(false)]; - tensor input_253_cast_fp16 = concat(axis = var_11191, interleave = input_253_interleave_0, values = (hidden_states_203_cast_fp16, var_11193_cast_fp16))[name = string("input_253_cast_fp16")]; - tensor normed_305_axes_0 = const()[name = string("normed_305_axes_0"), val = tensor([-1])]; - fp16 var_11188_to_fp16 = const()[name = string("op_11188_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_305_cast_fp16 = layer_norm(axes = normed_305_axes_0, epsilon = var_11188_to_fp16, x = input_253_cast_fp16)[name = string("normed_305_cast_fp16")]; - tensor normed_307_begin_0 = const()[name = string("normed_307_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_307_end_0 = const()[name = string("normed_307_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_307_end_mask_0 = const()[name = string("normed_307_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_307_cast_fp16 = slice_by_index(begin = normed_307_begin_0, end = normed_307_end_0, end_mask = normed_307_end_mask_0, x = normed_305_cast_fp16)[name = string("normed_307_cast_fp16")]; - tensor var_11207_to_fp16 = const()[name = string("op_11207_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281678272)))]; - tensor x_205_cast_fp16 = mul(x = normed_307_cast_fp16, y = var_11207_to_fp16)[name = string("x_205_cast_fp16")]; - tensor var_11219 = const()[name = string("op_11219"), val = tensor([0, 2, 1])]; - tensor input_255_axes_0 = const()[name = string("input_255_axes_0"), val = tensor([2])]; - tensor var_11220_cast_fp16 = transpose(perm = var_11219, x = x_205_cast_fp16)[name = string("transpose_118")]; - tensor input_255_cast_fp16 = expand_dims(axes = input_255_axes_0, x = var_11220_cast_fp16)[name = string("input_255_cast_fp16")]; - string x_207_pad_type_0 = const()[name = string("x_207_pad_type_0"), val = string("valid")]; - tensor x_207_strides_0 = const()[name = string("x_207_strides_0"), val = tensor([1, 1])]; - tensor x_207_pad_0 = const()[name = string("x_207_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_207_dilations_0 = const()[name = string("x_207_dilations_0"), val = tensor([1, 1])]; - int32 x_207_groups_0 = const()[name = string("x_207_groups_0"), val = int32(1)]; - tensor model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1337419712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1343391744))))[name = string("model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_207_cast_fp16 = conv(dilations = x_207_dilations_0, groups = x_207_groups_0, pad = x_207_pad_0, pad_type = x_207_pad_type_0, strides = x_207_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_255_cast_fp16)[name = string("x_207_cast_fp16")]; - string b_25_pad_type_0 = const()[name = string("b_25_pad_type_0"), val = string("valid")]; - tensor b_25_strides_0 = const()[name = string("b_25_strides_0"), val = tensor([1, 1])]; - tensor b_25_pad_0 = const()[name = string("b_25_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_25_dilations_0 = const()[name = string("b_25_dilations_0"), val = tensor([1, 1])]; - int32 b_25_groups_0 = const()[name = string("b_25_groups_0"), val = int32(1)]; - tensor model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1343502400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1349474432))))[name = string("model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_25_cast_fp16 = conv(dilations = b_25_dilations_0, groups = b_25_groups_0, pad = b_25_pad_0, pad_type = b_25_pad_type_0, strides = b_25_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_255_cast_fp16)[name = string("b_25_cast_fp16")]; - string var_11245_mode_0 = const()[name = string("op_11245_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_11245_cast_fp16 = gelu(mode = var_11245_mode_0, x = x_207_cast_fp16)[name = string("op_11245_cast_fp16")]; - tensor input_257_cast_fp16 = mul(x = var_11245_cast_fp16, y = b_25_cast_fp16)[name = string("input_257_cast_fp16")]; - string e_25_pad_type_0 = const()[name = string("e_25_pad_type_0"), val = string("valid")]; - tensor e_25_strides_0 = const()[name = string("e_25_strides_0"), val = tensor([1, 1])]; - tensor e_25_pad_0 = const()[name = string("e_25_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_25_dilations_0 = const()[name = string("e_25_dilations_0"), val = tensor([1, 1])]; - int32 e_25_groups_0 = const()[name = string("e_25_groups_0"), val = int32(1)]; - tensor model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293846016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299818048))))[name = string("model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_25_cast_fp16 = conv(dilations = e_25_dilations_0, groups = e_25_groups_0, pad = e_25_pad_0, pad_type = e_25_pad_type_0, strides = e_25_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_257_cast_fp16)[name = string("e_25_cast_fp16")]; - tensor var_11253_axes_0 = const()[name = string("op_11253_axes_0"), val = tensor([2])]; - tensor var_11253_cast_fp16 = squeeze(axes = var_11253_axes_0, x = e_25_cast_fp16)[name = string("op_11253_cast_fp16")]; - tensor var_11254 = const()[name = string("op_11254"), val = tensor([0, 2, 1])]; - int32 var_11265 = const()[name = string("op_11265"), val = int32(-1)]; - fp16 const_544_promoted_to_fp16 = const()[name = string("const_544_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_205_cast_fp16 = transpose(perm = var_11254, x = var_11253_cast_fp16)[name = string("transpose_117")]; - tensor var_11267_cast_fp16 = mul(x = hidden_states_205_cast_fp16, y = const_544_promoted_to_fp16)[name = string("op_11267_cast_fp16")]; - bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)]; - tensor input_259_cast_fp16 = concat(axis = var_11265, interleave = input_259_interleave_0, values = (hidden_states_205_cast_fp16, var_11267_cast_fp16))[name = string("input_259_cast_fp16")]; - tensor normed_309_axes_0 = const()[name = string("normed_309_axes_0"), val = tensor([-1])]; - fp16 var_11262_to_fp16 = const()[name = string("op_11262_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_309_cast_fp16 = layer_norm(axes = normed_309_axes_0, epsilon = var_11262_to_fp16, x = input_259_cast_fp16)[name = string("normed_309_cast_fp16")]; - tensor normed_311_begin_0 = const()[name = string("normed_311_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_311_end_0 = const()[name = string("normed_311_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_311_end_mask_0 = const()[name = string("normed_311_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_311_cast_fp16 = slice_by_index(begin = normed_311_begin_0, end = normed_311_end_0, end_mask = normed_311_end_mask_0, x = normed_309_cast_fp16)[name = string("normed_311_cast_fp16")]; - tensor var_11281_to_fp16 = const()[name = string("op_11281_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299836544)))]; - tensor hidden_states_207_cast_fp16 = mul(x = normed_311_cast_fp16, y = var_11281_to_fp16)[name = string("hidden_states_207_cast_fp16")]; - tensor hidden_states_209_cast_fp16 = add(x = hidden_states_203_cast_fp16, y = hidden_states_207_cast_fp16)[name = string("hidden_states_209_cast_fp16")]; - int32 var_11335 = const()[name = string("op_11335"), val = int32(-1)]; - fp16 const_549_promoted_to_fp16 = const()[name = string("const_549_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11337_cast_fp16 = mul(x = hidden_states_209_cast_fp16, y = const_549_promoted_to_fp16)[name = string("op_11337_cast_fp16")]; - bool input_261_interleave_0 = const()[name = string("input_261_interleave_0"), val = bool(false)]; - tensor input_261_cast_fp16 = concat(axis = var_11335, interleave = input_261_interleave_0, values = (hidden_states_209_cast_fp16, var_11337_cast_fp16))[name = string("input_261_cast_fp16")]; - tensor normed_313_axes_0 = const()[name = string("normed_313_axes_0"), val = tensor([-1])]; - fp16 var_11332_to_fp16 = const()[name = string("op_11332_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_313_cast_fp16 = layer_norm(axes = normed_313_axes_0, epsilon = var_11332_to_fp16, x = input_261_cast_fp16)[name = string("normed_313_cast_fp16")]; - tensor normed_315_begin_0 = const()[name = string("normed_315_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_315_end_0 = const()[name = string("normed_315_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_315_end_mask_0 = const()[name = string("normed_315_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_315_cast_fp16 = slice_by_index(begin = normed_315_begin_0, end = normed_315_end_0, end_mask = normed_315_end_mask_0, x = normed_313_cast_fp16)[name = string("normed_315_cast_fp16")]; - tensor var_11351_to_fp16 = const()[name = string("op_11351_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299838912)))]; - tensor hidden_states_211_cast_fp16 = mul(x = normed_315_cast_fp16, y = var_11351_to_fp16)[name = string("hidden_states_211_cast_fp16")]; - tensor var_11362 = const()[name = string("op_11362"), val = tensor([0, 2, 1])]; - tensor var_11365_axes_0 = const()[name = string("op_11365_axes_0"), val = tensor([2])]; - tensor var_11363_cast_fp16 = transpose(perm = var_11362, x = hidden_states_211_cast_fp16)[name = string("transpose_116")]; - tensor var_11365_cast_fp16 = expand_dims(axes = var_11365_axes_0, x = var_11363_cast_fp16)[name = string("op_11365_cast_fp16")]; - string query_states_105_pad_type_0 = const()[name = string("query_states_105_pad_type_0"), val = string("valid")]; - tensor query_states_105_strides_0 = const()[name = string("query_states_105_strides_0"), val = tensor([1, 1])]; - tensor query_states_105_pad_0 = const()[name = string("query_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_105_dilations_0 = const()[name = string("query_states_105_dilations_0"), val = tensor([1, 1])]; - int32 query_states_105_groups_0 = const()[name = string("query_states_105_groups_0"), val = int32(1)]; - tensor query_states_105 = conv(dilations = query_states_105_dilations_0, groups = query_states_105_groups_0, pad = query_states_105_pad_0, pad_type = query_states_105_pad_type_0, strides = query_states_105_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_11365_cast_fp16)[name = string("query_states_105")]; - string key_states_131_pad_type_0 = const()[name = string("key_states_131_pad_type_0"), val = string("valid")]; - tensor key_states_131_strides_0 = const()[name = string("key_states_131_strides_0"), val = tensor([1, 1])]; - tensor key_states_131_pad_0 = const()[name = string("key_states_131_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_131_dilations_0 = const()[name = string("key_states_131_dilations_0"), val = tensor([1, 1])]; - int32 key_states_131_groups_0 = const()[name = string("key_states_131_groups_0"), val = int32(1)]; - tensor key_states_131 = conv(dilations = key_states_131_dilations_0, groups = key_states_131_groups_0, pad = key_states_131_pad_0, pad_type = key_states_131_pad_type_0, strides = key_states_131_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_11365_cast_fp16)[name = string("key_states_131")]; - string value_states_105_pad_type_0 = const()[name = string("value_states_105_pad_type_0"), val = string("valid")]; - tensor value_states_105_strides_0 = const()[name = string("value_states_105_strides_0"), val = tensor([1, 1])]; - tensor value_states_105_pad_0 = const()[name = string("value_states_105_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_105_dilations_0 = const()[name = string("value_states_105_dilations_0"), val = tensor([1, 1])]; - int32 value_states_105_groups_0 = const()[name = string("value_states_105_groups_0"), val = int32(1)]; - tensor value_states_105 = conv(dilations = value_states_105_dilations_0, groups = value_states_105_groups_0, pad = value_states_105_pad_0, pad_type = value_states_105_pad_type_0, strides = value_states_105_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_11365_cast_fp16)[name = string("value_states_105")]; - tensor var_11407 = const()[name = string("op_11407"), val = tensor([1, 4, 256, 64])]; - tensor var_11408 = reshape(shape = var_11407, x = query_states_105)[name = string("op_11408")]; - tensor var_11413 = const()[name = string("op_11413"), val = tensor([0, 1, 3, 2])]; - tensor var_11418 = const()[name = string("op_11418"), val = tensor([1, 1, 256, 64])]; - tensor var_11419 = reshape(shape = var_11418, x = key_states_131)[name = string("op_11419")]; - tensor var_11424 = const()[name = string("op_11424"), val = tensor([0, 1, 3, 2])]; - tensor var_11429 = const()[name = string("op_11429"), val = tensor([1, 1, 256, 64])]; - tensor var_11430 = reshape(shape = var_11429, x = value_states_105)[name = string("op_11430")]; - tensor var_11435 = const()[name = string("op_11435"), val = tensor([0, 1, 3, 2])]; - int32 var_11446 = const()[name = string("op_11446"), val = int32(-1)]; - fp16 const_554_promoted = const()[name = string("const_554_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_213 = transpose(perm = var_11413, x = var_11408)[name = string("transpose_115")]; - tensor var_11448 = mul(x = hidden_states_213, y = const_554_promoted)[name = string("op_11448")]; - bool input_265_interleave_0 = const()[name = string("input_265_interleave_0"), val = bool(false)]; - tensor input_265 = concat(axis = var_11446, interleave = input_265_interleave_0, values = (hidden_states_213, var_11448))[name = string("input_265")]; - tensor normed_317_axes_0 = const()[name = string("normed_317_axes_0"), val = tensor([-1])]; - fp16 var_11443_to_fp16 = const()[name = string("op_11443_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_317_cast_fp16 = layer_norm(axes = normed_317_axes_0, epsilon = var_11443_to_fp16, x = input_265)[name = string("normed_317_cast_fp16")]; - tensor normed_319_begin_0 = const()[name = string("normed_319_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_319_end_0 = const()[name = string("normed_319_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_319_end_mask_0 = const()[name = string("normed_319_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_319 = slice_by_index(begin = normed_319_begin_0, end = normed_319_end_0, end_mask = normed_319_end_mask_0, x = normed_317_cast_fp16)[name = string("normed_319")]; - tensor var_11462_to_fp16 = const()[name = string("op_11462_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299841280)))]; - tensor q_27_cast_fp16 = mul(x = normed_319, y = var_11462_to_fp16)[name = string("q_27_cast_fp16")]; - int32 var_11473 = const()[name = string("op_11473"), val = int32(-1)]; - fp16 const_558_promoted = const()[name = string("const_558_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_215 = transpose(perm = var_11424, x = var_11419)[name = string("transpose_114")]; - tensor var_11475 = mul(x = hidden_states_215, y = const_558_promoted)[name = string("op_11475")]; - bool input_267_interleave_0 = const()[name = string("input_267_interleave_0"), val = bool(false)]; - tensor input_267 = concat(axis = var_11473, interleave = input_267_interleave_0, values = (hidden_states_215, var_11475))[name = string("input_267")]; - tensor normed_321_axes_0 = const()[name = string("normed_321_axes_0"), val = tensor([-1])]; - fp16 var_11470_to_fp16 = const()[name = string("op_11470_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_321_cast_fp16 = layer_norm(axes = normed_321_axes_0, epsilon = var_11470_to_fp16, x = input_267)[name = string("normed_321_cast_fp16")]; - tensor normed_323_begin_0 = const()[name = string("normed_323_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_323_end_0 = const()[name = string("normed_323_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_323_end_mask_0 = const()[name = string("normed_323_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_323 = slice_by_index(begin = normed_323_begin_0, end = normed_323_end_0, end_mask = normed_323_end_mask_0, x = normed_321_cast_fp16)[name = string("normed_323")]; - tensor var_11489_to_fp16 = const()[name = string("op_11489_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299841856)))]; - tensor k_27_cast_fp16 = mul(x = normed_323, y = var_11489_to_fp16)[name = string("k_27_cast_fp16")]; - tensor var_11503_cast_fp16 = mul(x = q_27_cast_fp16, y = cos_5)[name = string("op_11503_cast_fp16")]; - tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27_cast_fp16)[name = string("x1_53_cast_fp16")]; - tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27_cast_fp16)[name = string("x2_53_cast_fp16")]; - fp16 const_564_promoted_to_fp16 = const()[name = string("const_564_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11524_cast_fp16 = mul(x = x2_53_cast_fp16, y = const_564_promoted_to_fp16)[name = string("op_11524_cast_fp16")]; - int32 var_11526 = const()[name = string("op_11526"), val = int32(-1)]; - bool var_11527_interleave_0 = const()[name = string("op_11527_interleave_0"), val = bool(false)]; - tensor var_11527_cast_fp16 = concat(axis = var_11526, interleave = var_11527_interleave_0, values = (var_11524_cast_fp16, x1_53_cast_fp16))[name = string("op_11527_cast_fp16")]; - tensor var_11528_cast_fp16 = mul(x = var_11527_cast_fp16, y = sin_5)[name = string("op_11528_cast_fp16")]; - tensor query_states_107_cast_fp16 = add(x = var_11503_cast_fp16, y = var_11528_cast_fp16)[name = string("query_states_107_cast_fp16")]; - tensor var_11531_cast_fp16 = mul(x = k_27_cast_fp16, y = cos_5)[name = string("op_11531_cast_fp16")]; - tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27_cast_fp16)[name = string("x1_55_cast_fp16")]; - tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27_cast_fp16)[name = string("x2_55_cast_fp16")]; - fp16 const_567_promoted_to_fp16 = const()[name = string("const_567_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11552_cast_fp16 = mul(x = x2_55_cast_fp16, y = const_567_promoted_to_fp16)[name = string("op_11552_cast_fp16")]; - int32 var_11554 = const()[name = string("op_11554"), val = int32(-1)]; - bool var_11555_interleave_0 = const()[name = string("op_11555_interleave_0"), val = bool(false)]; - tensor var_11555_cast_fp16 = concat(axis = var_11554, interleave = var_11555_interleave_0, values = (var_11552_cast_fp16, x1_55_cast_fp16))[name = string("op_11555_cast_fp16")]; - tensor var_11556_cast_fp16 = mul(x = var_11555_cast_fp16, y = sin_5)[name = string("op_11556_cast_fp16")]; - tensor key_states_133_cast_fp16 = add(x = var_11531_cast_fp16, y = var_11556_cast_fp16)[name = string("key_states_133_cast_fp16")]; - tensor key_slice_23_begin_0 = const()[name = string("key_slice_23_begin_0"), val = tensor([11, 0, 0, 0])]; - tensor key_slice_23_end_0 = const()[name = string("key_slice_23_end_0"), val = tensor([12, 1, 512, 256])]; - tensor key_slice_23_end_mask_0 = const()[name = string("key_slice_23_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_23_cast_fp16 = slice_by_index(begin = key_slice_23_begin_0, end = key_slice_23_end_0, end_mask = key_slice_23_end_mask_0, x = coreml_update_state_77)[name = string("key_slice_23_cast_fp16")]; - tensor var_11593_begin_0 = const()[name = string("op_11593_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_11593_end_0 = const()[name = string("op_11593_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_11593_end_mask_0 = const()[name = string("op_11593_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_11593_cast_fp16 = slice_by_index(begin = var_11593_begin_0, end = var_11593_end_0, end_mask = var_11593_end_mask_0, x = key_slice_23_cast_fp16)[name = string("op_11593_cast_fp16")]; - int32 var_11620 = const()[name = string("op_11620"), val = int32(2)]; - bool shifted_key_23_interleave_0 = const()[name = string("shifted_key_23_interleave_0"), val = bool(false)]; - tensor shifted_key_23_cast_fp16 = concat(axis = var_11620, interleave = shifted_key_23_interleave_0, values = (var_11593_cast_fp16, key_states_133_cast_fp16))[name = string("shifted_key_23_cast_fp16")]; - tensor concat_190 = const()[name = string("concat_190"), val = tensor([11, 0, 0, 0])]; - tensor concat_191 = const()[name = string("concat_191"), val = tensor([12, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_190, begin_mask = model_model_kv_cache_local_internal_tensor_assign_23_begin_mask_0, end = concat_191, end_mask = model_model_kv_cache_local_internal_tensor_assign_23_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_23_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_23_stride_0, update = shifted_key_23_cast_fp16, x = coreml_update_state_77)[name = string("model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_23_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_182_write_state")]; - tensor coreml_update_state_78 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_182")]; - tensor value_slice_23_begin_0 = const()[name = string("value_slice_23_begin_0"), val = tensor([33, 0, 0, 0])]; - tensor value_slice_23_end_0 = const()[name = string("value_slice_23_end_0"), val = tensor([34, 1, 512, 256])]; - tensor value_slice_23_end_mask_0 = const()[name = string("value_slice_23_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_23_cast_fp16 = slice_by_index(begin = value_slice_23_begin_0, end = value_slice_23_end_0, end_mask = value_slice_23_end_mask_0, x = coreml_update_state_78)[name = string("value_slice_23_cast_fp16")]; - tensor var_11663_begin_0 = const()[name = string("op_11663_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_11663_end_0 = const()[name = string("op_11663_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_11663_end_mask_0 = const()[name = string("op_11663_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_11663_cast_fp16 = slice_by_index(begin = var_11663_begin_0, end = var_11663_end_0, end_mask = var_11663_end_mask_0, x = value_slice_23_cast_fp16)[name = string("op_11663_cast_fp16")]; - int32 var_11690 = const()[name = string("op_11690"), val = int32(2)]; - bool shifted_value_23_interleave_0 = const()[name = string("shifted_value_23_interleave_0"), val = bool(false)]; - tensor value_states_107 = transpose(perm = var_11435, x = var_11430)[name = string("transpose_113")]; - tensor shifted_value_23_cast_fp16 = concat(axis = var_11690, interleave = shifted_value_23_interleave_0, values = (var_11663_cast_fp16, value_states_107))[name = string("shifted_value_23_cast_fp16")]; - tensor concat_192 = const()[name = string("concat_192"), val = tensor([33, 0, 0, 0])]; - tensor concat_193 = const()[name = string("concat_193"), val = tensor([34, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_192, begin_mask = model_model_kv_cache_local_internal_tensor_assign_24_begin_mask_0, end = concat_193, end_mask = model_model_kv_cache_local_internal_tensor_assign_24_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_24_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_24_stride_0, update = shifted_value_23_cast_fp16, x = coreml_update_state_78)[name = string("model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_24_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_183_write_state")]; - tensor coreml_update_state_79 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_183")]; - tensor var_11718_begin_0 = const()[name = string("op_11718_begin_0"), val = tensor([11, 0, 0, 0])]; - tensor var_11718_end_0 = const()[name = string("op_11718_end_0"), val = tensor([12, 1, 512, 256])]; - tensor var_11718_end_mask_0 = const()[name = string("op_11718_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_11718_cast_fp16 = slice_by_index(begin = var_11718_begin_0, end = var_11718_end_0, end_mask = var_11718_end_mask_0, x = coreml_update_state_79)[name = string("op_11718_cast_fp16")]; - tensor var_11725_begin_0 = const()[name = string("op_11725_begin_0"), val = tensor([33, 0, 0, 0])]; - tensor var_11725_end_0 = const()[name = string("op_11725_end_0"), val = tensor([34, 1, 512, 256])]; - tensor var_11725_end_mask_0 = const()[name = string("op_11725_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_11725_cast_fp16 = slice_by_index(begin = var_11725_begin_0, end = var_11725_end_0, end_mask = var_11725_end_mask_0, x = coreml_update_state_79)[name = string("op_11725_cast_fp16")]; - tensor var_11764 = const()[name = string("op_11764"), val = tensor([1, 4, 1, 1])]; - tensor x_213_cast_fp16 = tile(reps = var_11764, x = var_11718_cast_fp16)[name = string("x_213_cast_fp16")]; - tensor var_11784 = const()[name = string("op_11784"), val = tensor([1, 4, 1, 1])]; - tensor x_219_cast_fp16 = tile(reps = var_11784, x = var_11725_cast_fp16)[name = string("x_219_cast_fp16")]; - bool var_11811_transpose_x_0 = const()[name = string("op_11811_transpose_x_0"), val = bool(false)]; - bool var_11811_transpose_y_0 = const()[name = string("op_11811_transpose_y_0"), val = bool(true)]; - tensor var_11811 = matmul(transpose_x = var_11811_transpose_x_0, transpose_y = var_11811_transpose_y_0, x = query_states_107_cast_fp16, y = x_213_cast_fp16)[name = string("op_11811")]; - fp16 var_11812_to_fp16 = const()[name = string("op_11812_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_53_cast_fp16 = mul(x = var_11811, y = var_11812_to_fp16)[name = string("attn_weights_53_cast_fp16")]; - tensor attn_weights_55_cast_fp16 = add(x = attn_weights_53_cast_fp16, y = mask_slice_1)[name = string("attn_weights_55_cast_fp16")]; - int32 var_11847 = const()[name = string("op_11847"), val = int32(-1)]; - tensor var_11849_cast_fp16 = softmax(axis = var_11847, x = attn_weights_55_cast_fp16)[name = string("op_11849_cast_fp16")]; - tensor concat_198 = const()[name = string("concat_198"), val = tensor([4, 64, 512])]; - tensor reshape_39_cast_fp16 = reshape(shape = concat_198, x = var_11849_cast_fp16)[name = string("reshape_39_cast_fp16")]; - tensor concat_199 = const()[name = string("concat_199"), val = tensor([4, 512, 256])]; - tensor reshape_40_cast_fp16 = reshape(shape = concat_199, x = x_219_cast_fp16)[name = string("reshape_40_cast_fp16")]; - bool matmul_13_transpose_x_0 = const()[name = string("matmul_13_transpose_x_0"), val = bool(false)]; - bool matmul_13_transpose_y_0 = const()[name = string("matmul_13_transpose_y_0"), val = bool(false)]; - tensor matmul_13_cast_fp16 = matmul(transpose_x = matmul_13_transpose_x_0, transpose_y = matmul_13_transpose_y_0, x = reshape_39_cast_fp16, y = reshape_40_cast_fp16)[name = string("matmul_13_cast_fp16")]; - tensor concat_203 = const()[name = string("concat_203"), val = tensor([1, 4, 64, 256])]; - tensor reshape_41_cast_fp16 = reshape(shape = concat_203, x = matmul_13_cast_fp16)[name = string("reshape_41_cast_fp16")]; - tensor var_11861_perm_0 = const()[name = string("op_11861_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_11880 = const()[name = string("op_11880"), val = tensor([1, 64, 1024])]; - tensor var_11861_cast_fp16 = transpose(perm = var_11861_perm_0, x = reshape_41_cast_fp16)[name = string("transpose_112")]; - tensor attn_output_135_cast_fp16 = reshape(shape = var_11880, x = var_11861_cast_fp16)[name = string("attn_output_135_cast_fp16")]; - tensor var_11885 = const()[name = string("op_11885"), val = tensor([0, 2, 1])]; - string var_11901_pad_type_0 = const()[name = string("op_11901_pad_type_0"), val = string("valid")]; - int32 var_11901_groups_0 = const()[name = string("op_11901_groups_0"), val = int32(1)]; - tensor var_11901_strides_0 = const()[name = string("op_11901_strides_0"), val = tensor([1])]; - tensor var_11901_pad_0 = const()[name = string("op_11901_pad_0"), val = tensor([0, 0])]; - tensor var_11901_dilations_0 = const()[name = string("op_11901_dilations_0"), val = tensor([1])]; - tensor squeeze_13_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299842432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300727232))))[name = string("squeeze_13_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_11886_cast_fp16 = transpose(perm = var_11885, x = attn_output_135_cast_fp16)[name = string("transpose_111")]; - tensor var_11901_cast_fp16 = conv(dilations = var_11901_dilations_0, groups = var_11901_groups_0, pad = var_11901_pad_0, pad_type = var_11901_pad_type_0, strides = var_11901_strides_0, weight = squeeze_13_cast_fp16_to_fp32_to_fp16_palettized, x = var_11886_cast_fp16)[name = string("op_11901_cast_fp16")]; - tensor var_11905 = const()[name = string("op_11905"), val = tensor([0, 2, 1])]; - int32 var_11916 = const()[name = string("op_11916"), val = int32(-1)]; - fp16 const_578_promoted_to_fp16 = const()[name = string("const_578_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_217_cast_fp16 = transpose(perm = var_11905, x = var_11901_cast_fp16)[name = string("transpose_110")]; - tensor var_11918_cast_fp16 = mul(x = hidden_states_217_cast_fp16, y = const_578_promoted_to_fp16)[name = string("op_11918_cast_fp16")]; - bool input_271_interleave_0 = const()[name = string("input_271_interleave_0"), val = bool(false)]; - tensor input_271_cast_fp16 = concat(axis = var_11916, interleave = input_271_interleave_0, values = (hidden_states_217_cast_fp16, var_11918_cast_fp16))[name = string("input_271_cast_fp16")]; - tensor normed_325_axes_0 = const()[name = string("normed_325_axes_0"), val = tensor([-1])]; - fp16 var_11913_to_fp16 = const()[name = string("op_11913_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_325_cast_fp16 = layer_norm(axes = normed_325_axes_0, epsilon = var_11913_to_fp16, x = input_271_cast_fp16)[name = string("normed_325_cast_fp16")]; - tensor normed_327_begin_0 = const()[name = string("normed_327_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_327_end_0 = const()[name = string("normed_327_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_327_end_mask_0 = const()[name = string("normed_327_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_327_cast_fp16 = slice_by_index(begin = normed_327_begin_0, end = normed_327_end_0, end_mask = normed_327_end_mask_0, x = normed_325_cast_fp16)[name = string("normed_327_cast_fp16")]; - tensor var_11932_to_fp16 = const()[name = string("op_11932_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300745728)))]; - tensor attn_output_139_cast_fp16 = mul(x = normed_327_cast_fp16, y = var_11932_to_fp16)[name = string("attn_output_139_cast_fp16")]; - tensor hidden_states_219_cast_fp16 = add(x = hidden_states_209_cast_fp16, y = attn_output_139_cast_fp16)[name = string("hidden_states_219_cast_fp16")]; - int32 var_11945 = const()[name = string("op_11945"), val = int32(-1)]; - fp16 const_582_promoted_to_fp16 = const()[name = string("const_582_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_11947_cast_fp16 = mul(x = hidden_states_219_cast_fp16, y = const_582_promoted_to_fp16)[name = string("op_11947_cast_fp16")]; - bool input_273_interleave_0 = const()[name = string("input_273_interleave_0"), val = bool(false)]; - tensor input_273_cast_fp16 = concat(axis = var_11945, interleave = input_273_interleave_0, values = (hidden_states_219_cast_fp16, var_11947_cast_fp16))[name = string("input_273_cast_fp16")]; - tensor normed_329_axes_0 = const()[name = string("normed_329_axes_0"), val = tensor([-1])]; - fp16 var_11942_to_fp16 = const()[name = string("op_11942_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_329_cast_fp16 = layer_norm(axes = normed_329_axes_0, epsilon = var_11942_to_fp16, x = input_273_cast_fp16)[name = string("normed_329_cast_fp16")]; - tensor normed_331_begin_0 = const()[name = string("normed_331_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_331_end_0 = const()[name = string("normed_331_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_331_end_mask_0 = const()[name = string("normed_331_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_331_cast_fp16 = slice_by_index(begin = normed_331_begin_0, end = normed_331_end_0, end_mask = normed_331_end_mask_0, x = normed_329_cast_fp16)[name = string("normed_331_cast_fp16")]; - tensor var_11961_to_fp16 = const()[name = string("op_11961_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300748096)))]; - tensor x_221_cast_fp16 = mul(x = normed_331_cast_fp16, y = var_11961_to_fp16)[name = string("x_221_cast_fp16")]; - tensor var_11973 = const()[name = string("op_11973"), val = tensor([0, 2, 1])]; - tensor input_275_axes_0 = const()[name = string("input_275_axes_0"), val = tensor([2])]; - tensor var_11974_cast_fp16 = transpose(perm = var_11973, x = x_221_cast_fp16)[name = string("transpose_109")]; - tensor input_275_cast_fp16 = expand_dims(axes = input_275_axes_0, x = var_11974_cast_fp16)[name = string("input_275_cast_fp16")]; - string x_223_pad_type_0 = const()[name = string("x_223_pad_type_0"), val = string("valid")]; - tensor x_223_strides_0 = const()[name = string("x_223_strides_0"), val = tensor([1, 1])]; - tensor x_223_pad_0 = const()[name = string("x_223_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_223_dilations_0 = const()[name = string("x_223_dilations_0"), val = tensor([1, 1])]; - int32 x_223_groups_0 = const()[name = string("x_223_groups_0"), val = int32(1)]; - tensor model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1349585088))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1355557120))))[name = string("model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_223_cast_fp16 = conv(dilations = x_223_dilations_0, groups = x_223_groups_0, pad = x_223_pad_0, pad_type = x_223_pad_type_0, strides = x_223_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_275_cast_fp16)[name = string("x_223_cast_fp16")]; - string b_27_pad_type_0 = const()[name = string("b_27_pad_type_0"), val = string("valid")]; - tensor b_27_strides_0 = const()[name = string("b_27_strides_0"), val = tensor([1, 1])]; - tensor b_27_pad_0 = const()[name = string("b_27_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_27_dilations_0 = const()[name = string("b_27_dilations_0"), val = tensor([1, 1])]; - int32 b_27_groups_0 = const()[name = string("b_27_groups_0"), val = int32(1)]; - tensor model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1355667776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1361639808))))[name = string("model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_27_cast_fp16 = conv(dilations = b_27_dilations_0, groups = b_27_groups_0, pad = b_27_pad_0, pad_type = b_27_pad_type_0, strides = b_27_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_275_cast_fp16)[name = string("b_27_cast_fp16")]; - string var_11999_mode_0 = const()[name = string("op_11999_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_11999_cast_fp16 = gelu(mode = var_11999_mode_0, x = x_223_cast_fp16)[name = string("op_11999_cast_fp16")]; - tensor input_277_cast_fp16 = mul(x = var_11999_cast_fp16, y = b_27_cast_fp16)[name = string("input_277_cast_fp16")]; - string e_27_pad_type_0 = const()[name = string("e_27_pad_type_0"), val = string("valid")]; - tensor e_27_strides_0 = const()[name = string("e_27_strides_0"), val = tensor([1, 1])]; - tensor e_27_pad_0 = const()[name = string("e_27_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_27_dilations_0 = const()[name = string("e_27_dilations_0"), val = tensor([1, 1])]; - int32 e_27_groups_0 = const()[name = string("e_27_groups_0"), val = int32(1)]; - tensor model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312915840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318887872))))[name = string("model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_27_cast_fp16 = conv(dilations = e_27_dilations_0, groups = e_27_groups_0, pad = e_27_pad_0, pad_type = e_27_pad_type_0, strides = e_27_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_277_cast_fp16)[name = string("e_27_cast_fp16")]; - tensor var_12007_axes_0 = const()[name = string("op_12007_axes_0"), val = tensor([2])]; - tensor var_12007_cast_fp16 = squeeze(axes = var_12007_axes_0, x = e_27_cast_fp16)[name = string("op_12007_cast_fp16")]; - tensor var_12008 = const()[name = string("op_12008"), val = tensor([0, 2, 1])]; - int32 var_12019 = const()[name = string("op_12019"), val = int32(-1)]; - fp16 const_586_promoted_to_fp16 = const()[name = string("const_586_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_221_cast_fp16 = transpose(perm = var_12008, x = var_12007_cast_fp16)[name = string("transpose_108")]; - tensor var_12021_cast_fp16 = mul(x = hidden_states_221_cast_fp16, y = const_586_promoted_to_fp16)[name = string("op_12021_cast_fp16")]; - bool input_279_interleave_0 = const()[name = string("input_279_interleave_0"), val = bool(false)]; - tensor input_279_cast_fp16 = concat(axis = var_12019, interleave = input_279_interleave_0, values = (hidden_states_221_cast_fp16, var_12021_cast_fp16))[name = string("input_279_cast_fp16")]; - tensor normed_333_axes_0 = const()[name = string("normed_333_axes_0"), val = tensor([-1])]; - fp16 var_12016_to_fp16 = const()[name = string("op_12016_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_333_cast_fp16 = layer_norm(axes = normed_333_axes_0, epsilon = var_12016_to_fp16, x = input_279_cast_fp16)[name = string("normed_333_cast_fp16")]; - tensor normed_335_begin_0 = const()[name = string("normed_335_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_335_end_0 = const()[name = string("normed_335_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_335_end_mask_0 = const()[name = string("normed_335_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_335_cast_fp16 = slice_by_index(begin = normed_335_begin_0, end = normed_335_end_0, end_mask = normed_335_end_mask_0, x = normed_333_cast_fp16)[name = string("normed_335_cast_fp16")]; - tensor var_12035_to_fp16 = const()[name = string("op_12035_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318906368)))]; - tensor hidden_states_223_cast_fp16 = mul(x = normed_335_cast_fp16, y = var_12035_to_fp16)[name = string("hidden_states_223_cast_fp16")]; - tensor hidden_states_225_cast_fp16 = add(x = hidden_states_219_cast_fp16, y = hidden_states_223_cast_fp16)[name = string("hidden_states_225_cast_fp16")]; - int32 var_12089 = const()[name = string("op_12089"), val = int32(-1)]; - fp16 const_591_promoted_to_fp16 = const()[name = string("const_591_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12091_cast_fp16 = mul(x = hidden_states_225_cast_fp16, y = const_591_promoted_to_fp16)[name = string("op_12091_cast_fp16")]; - bool input_281_interleave_0 = const()[name = string("input_281_interleave_0"), val = bool(false)]; - tensor input_281_cast_fp16 = concat(axis = var_12089, interleave = input_281_interleave_0, values = (hidden_states_225_cast_fp16, var_12091_cast_fp16))[name = string("input_281_cast_fp16")]; - tensor normed_337_axes_0 = const()[name = string("normed_337_axes_0"), val = tensor([-1])]; - fp16 var_12086_to_fp16 = const()[name = string("op_12086_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_337_cast_fp16 = layer_norm(axes = normed_337_axes_0, epsilon = var_12086_to_fp16, x = input_281_cast_fp16)[name = string("normed_337_cast_fp16")]; - tensor normed_339_begin_0 = const()[name = string("normed_339_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_339_end_0 = const()[name = string("normed_339_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_339_end_mask_0 = const()[name = string("normed_339_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_339_cast_fp16 = slice_by_index(begin = normed_339_begin_0, end = normed_339_end_0, end_mask = normed_339_end_mask_0, x = normed_337_cast_fp16)[name = string("normed_339_cast_fp16")]; - tensor var_12105_to_fp16 = const()[name = string("op_12105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318908736)))]; - tensor hidden_states_227_cast_fp16 = mul(x = normed_339_cast_fp16, y = var_12105_to_fp16)[name = string("hidden_states_227_cast_fp16")]; - tensor var_12116 = const()[name = string("op_12116"), val = tensor([0, 2, 1])]; - tensor var_12119_axes_0 = const()[name = string("op_12119_axes_0"), val = tensor([2])]; - tensor var_12117_cast_fp16 = transpose(perm = var_12116, x = hidden_states_227_cast_fp16)[name = string("transpose_107")]; - tensor var_12119_cast_fp16 = expand_dims(axes = var_12119_axes_0, x = var_12117_cast_fp16)[name = string("op_12119_cast_fp16")]; - string query_states_113_pad_type_0 = const()[name = string("query_states_113_pad_type_0"), val = string("valid")]; - tensor query_states_113_strides_0 = const()[name = string("query_states_113_strides_0"), val = tensor([1, 1])]; - tensor query_states_113_pad_0 = const()[name = string("query_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_113_dilations_0 = const()[name = string("query_states_113_dilations_0"), val = tensor([1, 1])]; - int32 query_states_113_groups_0 = const()[name = string("query_states_113_groups_0"), val = int32(1)]; - tensor query_states_113 = conv(dilations = query_states_113_dilations_0, groups = query_states_113_groups_0, pad = query_states_113_pad_0, pad_type = query_states_113_pad_type_0, strides = query_states_113_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_12119_cast_fp16)[name = string("query_states_113")]; - string key_states_141_pad_type_0 = const()[name = string("key_states_141_pad_type_0"), val = string("valid")]; - tensor key_states_141_strides_0 = const()[name = string("key_states_141_strides_0"), val = tensor([1, 1])]; - tensor key_states_141_pad_0 = const()[name = string("key_states_141_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_141_dilations_0 = const()[name = string("key_states_141_dilations_0"), val = tensor([1, 1])]; - int32 key_states_141_groups_0 = const()[name = string("key_states_141_groups_0"), val = int32(1)]; - tensor key_states_141 = conv(dilations = key_states_141_dilations_0, groups = key_states_141_groups_0, pad = key_states_141_pad_0, pad_type = key_states_141_pad_type_0, strides = key_states_141_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_12119_cast_fp16)[name = string("key_states_141")]; - string value_states_113_pad_type_0 = const()[name = string("value_states_113_pad_type_0"), val = string("valid")]; - tensor value_states_113_strides_0 = const()[name = string("value_states_113_strides_0"), val = tensor([1, 1])]; - tensor value_states_113_pad_0 = const()[name = string("value_states_113_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_113_dilations_0 = const()[name = string("value_states_113_dilations_0"), val = tensor([1, 1])]; - int32 value_states_113_groups_0 = const()[name = string("value_states_113_groups_0"), val = int32(1)]; - tensor value_states_113 = conv(dilations = value_states_113_dilations_0, groups = value_states_113_groups_0, pad = value_states_113_pad_0, pad_type = value_states_113_pad_type_0, strides = value_states_113_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_12119_cast_fp16)[name = string("value_states_113")]; - tensor var_12161 = const()[name = string("op_12161"), val = tensor([1, 4, 256, 64])]; - tensor var_12162 = reshape(shape = var_12161, x = query_states_113)[name = string("op_12162")]; - tensor var_12167 = const()[name = string("op_12167"), val = tensor([0, 1, 3, 2])]; - tensor var_12172 = const()[name = string("op_12172"), val = tensor([1, 1, 256, 64])]; - tensor var_12173 = reshape(shape = var_12172, x = key_states_141)[name = string("op_12173")]; - tensor var_12178 = const()[name = string("op_12178"), val = tensor([0, 1, 3, 2])]; - tensor var_12183 = const()[name = string("op_12183"), val = tensor([1, 1, 256, 64])]; - tensor var_12184 = reshape(shape = var_12183, x = value_states_113)[name = string("op_12184")]; - tensor var_12189 = const()[name = string("op_12189"), val = tensor([0, 1, 3, 2])]; - int32 var_12200 = const()[name = string("op_12200"), val = int32(-1)]; - fp16 const_596_promoted = const()[name = string("const_596_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_229 = transpose(perm = var_12167, x = var_12162)[name = string("transpose_106")]; - tensor var_12202 = mul(x = hidden_states_229, y = const_596_promoted)[name = string("op_12202")]; - bool input_285_interleave_0 = const()[name = string("input_285_interleave_0"), val = bool(false)]; - tensor input_285 = concat(axis = var_12200, interleave = input_285_interleave_0, values = (hidden_states_229, var_12202))[name = string("input_285")]; - tensor normed_341_axes_0 = const()[name = string("normed_341_axes_0"), val = tensor([-1])]; - fp16 var_12197_to_fp16 = const()[name = string("op_12197_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_341_cast_fp16 = layer_norm(axes = normed_341_axes_0, epsilon = var_12197_to_fp16, x = input_285)[name = string("normed_341_cast_fp16")]; - tensor normed_343_begin_0 = const()[name = string("normed_343_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_343_end_0 = const()[name = string("normed_343_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_343_end_mask_0 = const()[name = string("normed_343_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_343 = slice_by_index(begin = normed_343_begin_0, end = normed_343_end_0, end_mask = normed_343_end_mask_0, x = normed_341_cast_fp16)[name = string("normed_343")]; - tensor var_12216_to_fp16 = const()[name = string("op_12216_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318911104)))]; - tensor q_29_cast_fp16 = mul(x = normed_343, y = var_12216_to_fp16)[name = string("q_29_cast_fp16")]; - int32 var_12227 = const()[name = string("op_12227"), val = int32(-1)]; - fp16 const_600_promoted = const()[name = string("const_600_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_231 = transpose(perm = var_12178, x = var_12173)[name = string("transpose_105")]; - tensor var_12229 = mul(x = hidden_states_231, y = const_600_promoted)[name = string("op_12229")]; - bool input_287_interleave_0 = const()[name = string("input_287_interleave_0"), val = bool(false)]; - tensor input_287 = concat(axis = var_12227, interleave = input_287_interleave_0, values = (hidden_states_231, var_12229))[name = string("input_287")]; - tensor normed_345_axes_0 = const()[name = string("normed_345_axes_0"), val = tensor([-1])]; - fp16 var_12224_to_fp16 = const()[name = string("op_12224_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_345_cast_fp16 = layer_norm(axes = normed_345_axes_0, epsilon = var_12224_to_fp16, x = input_287)[name = string("normed_345_cast_fp16")]; - tensor normed_347_begin_0 = const()[name = string("normed_347_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_347_end_0 = const()[name = string("normed_347_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_347_end_mask_0 = const()[name = string("normed_347_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_347 = slice_by_index(begin = normed_347_begin_0, end = normed_347_end_0, end_mask = normed_347_end_mask_0, x = normed_345_cast_fp16)[name = string("normed_347")]; - tensor var_12243_to_fp16 = const()[name = string("op_12243_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318911680)))]; - tensor k_29_cast_fp16 = mul(x = normed_347, y = var_12243_to_fp16)[name = string("k_29_cast_fp16")]; - tensor var_12257_cast_fp16 = mul(x = q_29_cast_fp16, y = cos_5)[name = string("op_12257_cast_fp16")]; - tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29_cast_fp16)[name = string("x1_57_cast_fp16")]; - tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29_cast_fp16)[name = string("x2_57_cast_fp16")]; - fp16 const_606_promoted_to_fp16 = const()[name = string("const_606_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12278_cast_fp16 = mul(x = x2_57_cast_fp16, y = const_606_promoted_to_fp16)[name = string("op_12278_cast_fp16")]; - int32 var_12280 = const()[name = string("op_12280"), val = int32(-1)]; - bool var_12281_interleave_0 = const()[name = string("op_12281_interleave_0"), val = bool(false)]; - tensor var_12281_cast_fp16 = concat(axis = var_12280, interleave = var_12281_interleave_0, values = (var_12278_cast_fp16, x1_57_cast_fp16))[name = string("op_12281_cast_fp16")]; - tensor var_12282_cast_fp16 = mul(x = var_12281_cast_fp16, y = sin_5)[name = string("op_12282_cast_fp16")]; - tensor query_states_115_cast_fp16 = add(x = var_12257_cast_fp16, y = var_12282_cast_fp16)[name = string("query_states_115_cast_fp16")]; - tensor var_12285_cast_fp16 = mul(x = k_29_cast_fp16, y = cos_5)[name = string("op_12285_cast_fp16")]; - tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29_cast_fp16)[name = string("x1_59_cast_fp16")]; - tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29_cast_fp16)[name = string("x2_59_cast_fp16")]; - fp16 const_609_promoted_to_fp16 = const()[name = string("const_609_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12306_cast_fp16 = mul(x = x2_59_cast_fp16, y = const_609_promoted_to_fp16)[name = string("op_12306_cast_fp16")]; - int32 var_12308 = const()[name = string("op_12308"), val = int32(-1)]; - bool var_12309_interleave_0 = const()[name = string("op_12309_interleave_0"), val = bool(false)]; - tensor var_12309_cast_fp16 = concat(axis = var_12308, interleave = var_12309_interleave_0, values = (var_12306_cast_fp16, x1_59_cast_fp16))[name = string("op_12309_cast_fp16")]; - tensor var_12310_cast_fp16 = mul(x = var_12309_cast_fp16, y = sin_5)[name = string("op_12310_cast_fp16")]; - tensor key_states_143_cast_fp16 = add(x = var_12285_cast_fp16, y = var_12310_cast_fp16)[name = string("key_states_143_cast_fp16")]; - tensor key_slice_25_begin_0 = const()[name = string("key_slice_25_begin_0"), val = tensor([12, 0, 0, 0])]; - tensor key_slice_25_end_0 = const()[name = string("key_slice_25_end_0"), val = tensor([13, 1, 512, 256])]; - tensor key_slice_25_end_mask_0 = const()[name = string("key_slice_25_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_25_cast_fp16 = slice_by_index(begin = key_slice_25_begin_0, end = key_slice_25_end_0, end_mask = key_slice_25_end_mask_0, x = coreml_update_state_79)[name = string("key_slice_25_cast_fp16")]; - tensor var_12347_begin_0 = const()[name = string("op_12347_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_12347_end_0 = const()[name = string("op_12347_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_12347_end_mask_0 = const()[name = string("op_12347_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_12347_cast_fp16 = slice_by_index(begin = var_12347_begin_0, end = var_12347_end_0, end_mask = var_12347_end_mask_0, x = key_slice_25_cast_fp16)[name = string("op_12347_cast_fp16")]; - int32 var_12374 = const()[name = string("op_12374"), val = int32(2)]; - bool shifted_key_25_interleave_0 = const()[name = string("shifted_key_25_interleave_0"), val = bool(false)]; - tensor shifted_key_25_cast_fp16 = concat(axis = var_12374, interleave = shifted_key_25_interleave_0, values = (var_12347_cast_fp16, key_states_143_cast_fp16))[name = string("shifted_key_25_cast_fp16")]; - tensor concat_204 = const()[name = string("concat_204"), val = tensor([12, 0, 0, 0])]; - tensor concat_205 = const()[name = string("concat_205"), val = tensor([13, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_204, begin_mask = model_model_kv_cache_local_internal_tensor_assign_25_begin_mask_0, end = concat_205, end_mask = model_model_kv_cache_local_internal_tensor_assign_25_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_25_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_25_stride_0, update = shifted_key_25_cast_fp16, x = coreml_update_state_79)[name = string("model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_25_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_184_write_state")]; - tensor coreml_update_state_80 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_184")]; - tensor value_slice_25_begin_0 = const()[name = string("value_slice_25_begin_0"), val = tensor([34, 0, 0, 0])]; - tensor value_slice_25_end_0 = const()[name = string("value_slice_25_end_0"), val = tensor([35, 1, 512, 256])]; - tensor value_slice_25_end_mask_0 = const()[name = string("value_slice_25_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_25_cast_fp16 = slice_by_index(begin = value_slice_25_begin_0, end = value_slice_25_end_0, end_mask = value_slice_25_end_mask_0, x = coreml_update_state_80)[name = string("value_slice_25_cast_fp16")]; - tensor var_12417_begin_0 = const()[name = string("op_12417_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_12417_end_0 = const()[name = string("op_12417_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_12417_end_mask_0 = const()[name = string("op_12417_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_12417_cast_fp16 = slice_by_index(begin = var_12417_begin_0, end = var_12417_end_0, end_mask = var_12417_end_mask_0, x = value_slice_25_cast_fp16)[name = string("op_12417_cast_fp16")]; - int32 var_12444 = const()[name = string("op_12444"), val = int32(2)]; - bool shifted_value_25_interleave_0 = const()[name = string("shifted_value_25_interleave_0"), val = bool(false)]; - tensor value_states_115 = transpose(perm = var_12189, x = var_12184)[name = string("transpose_104")]; - tensor shifted_value_25_cast_fp16 = concat(axis = var_12444, interleave = shifted_value_25_interleave_0, values = (var_12417_cast_fp16, value_states_115))[name = string("shifted_value_25_cast_fp16")]; - tensor concat_206 = const()[name = string("concat_206"), val = tensor([34, 0, 0, 0])]; - tensor concat_207 = const()[name = string("concat_207"), val = tensor([35, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_206, begin_mask = model_model_kv_cache_local_internal_tensor_assign_26_begin_mask_0, end = concat_207, end_mask = model_model_kv_cache_local_internal_tensor_assign_26_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_26_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_26_stride_0, update = shifted_value_25_cast_fp16, x = coreml_update_state_80)[name = string("model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_26_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_185_write_state")]; - tensor coreml_update_state_81 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_185")]; - tensor var_12472_begin_0 = const()[name = string("op_12472_begin_0"), val = tensor([12, 0, 0, 0])]; - tensor var_12472_end_0 = const()[name = string("op_12472_end_0"), val = tensor([13, 1, 512, 256])]; - tensor var_12472_end_mask_0 = const()[name = string("op_12472_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_12472_cast_fp16 = slice_by_index(begin = var_12472_begin_0, end = var_12472_end_0, end_mask = var_12472_end_mask_0, x = coreml_update_state_81)[name = string("op_12472_cast_fp16")]; - tensor var_12479_begin_0 = const()[name = string("op_12479_begin_0"), val = tensor([34, 0, 0, 0])]; - tensor var_12479_end_0 = const()[name = string("op_12479_end_0"), val = tensor([35, 1, 512, 256])]; - tensor var_12479_end_mask_0 = const()[name = string("op_12479_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_12479_cast_fp16 = slice_by_index(begin = var_12479_begin_0, end = var_12479_end_0, end_mask = var_12479_end_mask_0, x = coreml_update_state_81)[name = string("op_12479_cast_fp16")]; - tensor var_12518 = const()[name = string("op_12518"), val = tensor([1, 4, 1, 1])]; - tensor x_229_cast_fp16 = tile(reps = var_12518, x = var_12472_cast_fp16)[name = string("x_229_cast_fp16")]; - tensor var_12538 = const()[name = string("op_12538"), val = tensor([1, 4, 1, 1])]; - tensor x_235_cast_fp16 = tile(reps = var_12538, x = var_12479_cast_fp16)[name = string("x_235_cast_fp16")]; - bool var_12565_transpose_x_0 = const()[name = string("op_12565_transpose_x_0"), val = bool(false)]; - bool var_12565_transpose_y_0 = const()[name = string("op_12565_transpose_y_0"), val = bool(true)]; - tensor var_12565 = matmul(transpose_x = var_12565_transpose_x_0, transpose_y = var_12565_transpose_y_0, x = query_states_115_cast_fp16, y = x_229_cast_fp16)[name = string("op_12565")]; - fp16 var_12566_to_fp16 = const()[name = string("op_12566_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_57_cast_fp16 = mul(x = var_12565, y = var_12566_to_fp16)[name = string("attn_weights_57_cast_fp16")]; - tensor attn_weights_59_cast_fp16 = add(x = attn_weights_57_cast_fp16, y = mask_slice_1)[name = string("attn_weights_59_cast_fp16")]; - int32 var_12601 = const()[name = string("op_12601"), val = int32(-1)]; - tensor var_12603_cast_fp16 = softmax(axis = var_12601, x = attn_weights_59_cast_fp16)[name = string("op_12603_cast_fp16")]; - tensor concat_212 = const()[name = string("concat_212"), val = tensor([4, 64, 512])]; - tensor reshape_42_cast_fp16 = reshape(shape = concat_212, x = var_12603_cast_fp16)[name = string("reshape_42_cast_fp16")]; - tensor concat_213 = const()[name = string("concat_213"), val = tensor([4, 512, 256])]; - tensor reshape_43_cast_fp16 = reshape(shape = concat_213, x = x_235_cast_fp16)[name = string("reshape_43_cast_fp16")]; - bool matmul_14_transpose_x_0 = const()[name = string("matmul_14_transpose_x_0"), val = bool(false)]; - bool matmul_14_transpose_y_0 = const()[name = string("matmul_14_transpose_y_0"), val = bool(false)]; - tensor matmul_14_cast_fp16 = matmul(transpose_x = matmul_14_transpose_x_0, transpose_y = matmul_14_transpose_y_0, x = reshape_42_cast_fp16, y = reshape_43_cast_fp16)[name = string("matmul_14_cast_fp16")]; - tensor concat_217 = const()[name = string("concat_217"), val = tensor([1, 4, 64, 256])]; - tensor reshape_44_cast_fp16 = reshape(shape = concat_217, x = matmul_14_cast_fp16)[name = string("reshape_44_cast_fp16")]; - tensor var_12615_perm_0 = const()[name = string("op_12615_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_12634 = const()[name = string("op_12634"), val = tensor([1, 64, 1024])]; - tensor var_12615_cast_fp16 = transpose(perm = var_12615_perm_0, x = reshape_44_cast_fp16)[name = string("transpose_103")]; - tensor attn_output_145_cast_fp16 = reshape(shape = var_12634, x = var_12615_cast_fp16)[name = string("attn_output_145_cast_fp16")]; - tensor var_12639 = const()[name = string("op_12639"), val = tensor([0, 2, 1])]; - string var_12655_pad_type_0 = const()[name = string("op_12655_pad_type_0"), val = string("valid")]; - int32 var_12655_groups_0 = const()[name = string("op_12655_groups_0"), val = int32(1)]; - tensor var_12655_strides_0 = const()[name = string("op_12655_strides_0"), val = tensor([1])]; - tensor var_12655_pad_0 = const()[name = string("op_12655_pad_0"), val = tensor([0, 0])]; - tensor var_12655_dilations_0 = const()[name = string("op_12655_dilations_0"), val = tensor([1])]; - tensor squeeze_14_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318912256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319797056))))[name = string("squeeze_14_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_12640_cast_fp16 = transpose(perm = var_12639, x = attn_output_145_cast_fp16)[name = string("transpose_102")]; - tensor var_12655_cast_fp16 = conv(dilations = var_12655_dilations_0, groups = var_12655_groups_0, pad = var_12655_pad_0, pad_type = var_12655_pad_type_0, strides = var_12655_strides_0, weight = squeeze_14_cast_fp16_to_fp32_to_fp16_palettized, x = var_12640_cast_fp16)[name = string("op_12655_cast_fp16")]; - tensor var_12659 = const()[name = string("op_12659"), val = tensor([0, 2, 1])]; - int32 var_12670 = const()[name = string("op_12670"), val = int32(-1)]; - fp16 const_620_promoted_to_fp16 = const()[name = string("const_620_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_233_cast_fp16 = transpose(perm = var_12659, x = var_12655_cast_fp16)[name = string("transpose_101")]; - tensor var_12672_cast_fp16 = mul(x = hidden_states_233_cast_fp16, y = const_620_promoted_to_fp16)[name = string("op_12672_cast_fp16")]; - bool input_291_interleave_0 = const()[name = string("input_291_interleave_0"), val = bool(false)]; - tensor input_291_cast_fp16 = concat(axis = var_12670, interleave = input_291_interleave_0, values = (hidden_states_233_cast_fp16, var_12672_cast_fp16))[name = string("input_291_cast_fp16")]; - tensor normed_349_axes_0 = const()[name = string("normed_349_axes_0"), val = tensor([-1])]; - fp16 var_12667_to_fp16 = const()[name = string("op_12667_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_349_cast_fp16 = layer_norm(axes = normed_349_axes_0, epsilon = var_12667_to_fp16, x = input_291_cast_fp16)[name = string("normed_349_cast_fp16")]; - tensor normed_351_begin_0 = const()[name = string("normed_351_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_351_end_0 = const()[name = string("normed_351_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_351_end_mask_0 = const()[name = string("normed_351_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_351_cast_fp16 = slice_by_index(begin = normed_351_begin_0, end = normed_351_end_0, end_mask = normed_351_end_mask_0, x = normed_349_cast_fp16)[name = string("normed_351_cast_fp16")]; - tensor var_12686_to_fp16 = const()[name = string("op_12686_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319815552)))]; - tensor attn_output_149_cast_fp16 = mul(x = normed_351_cast_fp16, y = var_12686_to_fp16)[name = string("attn_output_149_cast_fp16")]; - tensor hidden_states_235_cast_fp16 = add(x = hidden_states_225_cast_fp16, y = attn_output_149_cast_fp16)[name = string("hidden_states_235_cast_fp16")]; - int32 var_12699 = const()[name = string("op_12699"), val = int32(-1)]; - fp16 const_624_promoted_to_fp16 = const()[name = string("const_624_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12701_cast_fp16 = mul(x = hidden_states_235_cast_fp16, y = const_624_promoted_to_fp16)[name = string("op_12701_cast_fp16")]; - bool input_293_interleave_0 = const()[name = string("input_293_interleave_0"), val = bool(false)]; - tensor input_293_cast_fp16 = concat(axis = var_12699, interleave = input_293_interleave_0, values = (hidden_states_235_cast_fp16, var_12701_cast_fp16))[name = string("input_293_cast_fp16")]; - tensor normed_353_axes_0 = const()[name = string("normed_353_axes_0"), val = tensor([-1])]; - fp16 var_12696_to_fp16 = const()[name = string("op_12696_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_353_cast_fp16 = layer_norm(axes = normed_353_axes_0, epsilon = var_12696_to_fp16, x = input_293_cast_fp16)[name = string("normed_353_cast_fp16")]; - tensor normed_355_begin_0 = const()[name = string("normed_355_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_355_end_0 = const()[name = string("normed_355_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_355_end_mask_0 = const()[name = string("normed_355_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_355_cast_fp16 = slice_by_index(begin = normed_355_begin_0, end = normed_355_end_0, end_mask = normed_355_end_mask_0, x = normed_353_cast_fp16)[name = string("normed_355_cast_fp16")]; - tensor var_12715_to_fp16 = const()[name = string("op_12715_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319817920)))]; - tensor x_237_cast_fp16 = mul(x = normed_355_cast_fp16, y = var_12715_to_fp16)[name = string("x_237_cast_fp16")]; - tensor var_12727 = const()[name = string("op_12727"), val = tensor([0, 2, 1])]; - tensor input_295_axes_0 = const()[name = string("input_295_axes_0"), val = tensor([2])]; - tensor var_12728_cast_fp16 = transpose(perm = var_12727, x = x_237_cast_fp16)[name = string("transpose_100")]; - tensor input_295_cast_fp16 = expand_dims(axes = input_295_axes_0, x = var_12728_cast_fp16)[name = string("input_295_cast_fp16")]; - string x_239_pad_type_0 = const()[name = string("x_239_pad_type_0"), val = string("valid")]; - tensor x_239_strides_0 = const()[name = string("x_239_strides_0"), val = tensor([1, 1])]; - tensor x_239_pad_0 = const()[name = string("x_239_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_239_dilations_0 = const()[name = string("x_239_dilations_0"), val = tensor([1, 1])]; - int32 x_239_groups_0 = const()[name = string("x_239_groups_0"), val = int32(1)]; - tensor model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1361750464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1367722496))))[name = string("model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_239_cast_fp16 = conv(dilations = x_239_dilations_0, groups = x_239_groups_0, pad = x_239_pad_0, pad_type = x_239_pad_type_0, strides = x_239_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_295_cast_fp16)[name = string("x_239_cast_fp16")]; - string b_29_pad_type_0 = const()[name = string("b_29_pad_type_0"), val = string("valid")]; - tensor b_29_strides_0 = const()[name = string("b_29_strides_0"), val = tensor([1, 1])]; - tensor b_29_pad_0 = const()[name = string("b_29_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_29_dilations_0 = const()[name = string("b_29_dilations_0"), val = tensor([1, 1])]; - int32 b_29_groups_0 = const()[name = string("b_29_groups_0"), val = int32(1)]; - tensor model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1367833152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1373805184))))[name = string("model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_29_cast_fp16 = conv(dilations = b_29_dilations_0, groups = b_29_groups_0, pad = b_29_pad_0, pad_type = b_29_pad_type_0, strides = b_29_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_295_cast_fp16)[name = string("b_29_cast_fp16")]; - string var_12753_mode_0 = const()[name = string("op_12753_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_12753_cast_fp16 = gelu(mode = var_12753_mode_0, x = x_239_cast_fp16)[name = string("op_12753_cast_fp16")]; - tensor input_297_cast_fp16 = mul(x = var_12753_cast_fp16, y = b_29_cast_fp16)[name = string("input_297_cast_fp16")]; - string e_29_pad_type_0 = const()[name = string("e_29_pad_type_0"), val = string("valid")]; - tensor e_29_strides_0 = const()[name = string("e_29_strides_0"), val = tensor([1, 1])]; - tensor e_29_pad_0 = const()[name = string("e_29_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_29_dilations_0 = const()[name = string("e_29_dilations_0"), val = tensor([1, 1])]; - int32 e_29_groups_0 = const()[name = string("e_29_groups_0"), val = int32(1)]; - tensor model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(331985664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337957696))))[name = string("model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_29_cast_fp16 = conv(dilations = e_29_dilations_0, groups = e_29_groups_0, pad = e_29_pad_0, pad_type = e_29_pad_type_0, strides = e_29_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_297_cast_fp16)[name = string("e_29_cast_fp16")]; - tensor var_12761_axes_0 = const()[name = string("op_12761_axes_0"), val = tensor([2])]; - tensor var_12761_cast_fp16 = squeeze(axes = var_12761_axes_0, x = e_29_cast_fp16)[name = string("op_12761_cast_fp16")]; - tensor var_12762 = const()[name = string("op_12762"), val = tensor([0, 2, 1])]; - int32 var_12773 = const()[name = string("op_12773"), val = int32(-1)]; - fp16 const_628_promoted_to_fp16 = const()[name = string("const_628_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_237_cast_fp16 = transpose(perm = var_12762, x = var_12761_cast_fp16)[name = string("transpose_99")]; - tensor var_12775_cast_fp16 = mul(x = hidden_states_237_cast_fp16, y = const_628_promoted_to_fp16)[name = string("op_12775_cast_fp16")]; - bool input_299_interleave_0 = const()[name = string("input_299_interleave_0"), val = bool(false)]; - tensor input_299_cast_fp16 = concat(axis = var_12773, interleave = input_299_interleave_0, values = (hidden_states_237_cast_fp16, var_12775_cast_fp16))[name = string("input_299_cast_fp16")]; - tensor normed_357_axes_0 = const()[name = string("normed_357_axes_0"), val = tensor([-1])]; - fp16 var_12770_to_fp16 = const()[name = string("op_12770_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_357_cast_fp16 = layer_norm(axes = normed_357_axes_0, epsilon = var_12770_to_fp16, x = input_299_cast_fp16)[name = string("normed_357_cast_fp16")]; - tensor normed_359_begin_0 = const()[name = string("normed_359_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_359_end_0 = const()[name = string("normed_359_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_359_end_mask_0 = const()[name = string("normed_359_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_359_cast_fp16 = slice_by_index(begin = normed_359_begin_0, end = normed_359_end_0, end_mask = normed_359_end_mask_0, x = normed_357_cast_fp16)[name = string("normed_359_cast_fp16")]; - tensor var_12789_to_fp16 = const()[name = string("op_12789_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337976192)))]; - tensor hidden_states_239_cast_fp16 = mul(x = normed_359_cast_fp16, y = var_12789_to_fp16)[name = string("hidden_states_239_cast_fp16")]; - tensor hidden_states_241_cast_fp16 = add(x = hidden_states_235_cast_fp16, y = hidden_states_239_cast_fp16)[name = string("hidden_states_241_cast_fp16")]; - int32 var_12843 = const()[name = string("op_12843"), val = int32(-1)]; - fp16 const_633_promoted_to_fp16 = const()[name = string("const_633_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_12845_cast_fp16 = mul(x = hidden_states_241_cast_fp16, y = const_633_promoted_to_fp16)[name = string("op_12845_cast_fp16")]; - bool input_301_interleave_0 = const()[name = string("input_301_interleave_0"), val = bool(false)]; - tensor input_301_cast_fp16 = concat(axis = var_12843, interleave = input_301_interleave_0, values = (hidden_states_241_cast_fp16, var_12845_cast_fp16))[name = string("input_301_cast_fp16")]; - tensor normed_361_axes_0 = const()[name = string("normed_361_axes_0"), val = tensor([-1])]; - fp16 var_12840_to_fp16 = const()[name = string("op_12840_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_361_cast_fp16 = layer_norm(axes = normed_361_axes_0, epsilon = var_12840_to_fp16, x = input_301_cast_fp16)[name = string("normed_361_cast_fp16")]; - tensor normed_363_begin_0 = const()[name = string("normed_363_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_363_end_0 = const()[name = string("normed_363_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_363_end_mask_0 = const()[name = string("normed_363_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_363_cast_fp16 = slice_by_index(begin = normed_363_begin_0, end = normed_363_end_0, end_mask = normed_363_end_mask_0, x = normed_361_cast_fp16)[name = string("normed_363_cast_fp16")]; - tensor var_12859_to_fp16 = const()[name = string("op_12859_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337978560)))]; - tensor hidden_states_243_cast_fp16 = mul(x = normed_363_cast_fp16, y = var_12859_to_fp16)[name = string("hidden_states_243_cast_fp16")]; - tensor var_12870 = const()[name = string("op_12870"), val = tensor([0, 2, 1])]; - tensor var_12873_axes_0 = const()[name = string("op_12873_axes_0"), val = tensor([2])]; - tensor var_12871_cast_fp16 = transpose(perm = var_12870, x = hidden_states_243_cast_fp16)[name = string("transpose_98")]; - tensor var_12873_cast_fp16 = expand_dims(axes = var_12873_axes_0, x = var_12871_cast_fp16)[name = string("op_12873_cast_fp16")]; - string query_states_121_pad_type_0 = const()[name = string("query_states_121_pad_type_0"), val = string("valid")]; - tensor query_states_121_strides_0 = const()[name = string("query_states_121_strides_0"), val = tensor([1, 1])]; - tensor query_states_121_pad_0 = const()[name = string("query_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_121_dilations_0 = const()[name = string("query_states_121_dilations_0"), val = tensor([1, 1])]; - int32 query_states_121_groups_0 = const()[name = string("query_states_121_groups_0"), val = int32(1)]; - tensor query_states_121 = conv(dilations = query_states_121_dilations_0, groups = query_states_121_groups_0, pad = query_states_121_pad_0, pad_type = query_states_121_pad_type_0, strides = query_states_121_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_12873_cast_fp16)[name = string("query_states_121")]; - string key_states_151_pad_type_0 = const()[name = string("key_states_151_pad_type_0"), val = string("valid")]; - tensor key_states_151_strides_0 = const()[name = string("key_states_151_strides_0"), val = tensor([1, 1])]; - tensor key_states_151_pad_0 = const()[name = string("key_states_151_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_151_dilations_0 = const()[name = string("key_states_151_dilations_0"), val = tensor([1, 1])]; - int32 key_states_151_groups_0 = const()[name = string("key_states_151_groups_0"), val = int32(1)]; - tensor key_states_151 = conv(dilations = key_states_151_dilations_0, groups = key_states_151_groups_0, pad = key_states_151_pad_0, pad_type = key_states_151_pad_type_0, strides = key_states_151_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_12873_cast_fp16)[name = string("key_states_151")]; - string value_states_121_pad_type_0 = const()[name = string("value_states_121_pad_type_0"), val = string("valid")]; - tensor value_states_121_strides_0 = const()[name = string("value_states_121_strides_0"), val = tensor([1, 1])]; - tensor value_states_121_pad_0 = const()[name = string("value_states_121_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_121_dilations_0 = const()[name = string("value_states_121_dilations_0"), val = tensor([1, 1])]; - int32 value_states_121_groups_0 = const()[name = string("value_states_121_groups_0"), val = int32(1)]; - tensor value_states_121 = conv(dilations = value_states_121_dilations_0, groups = value_states_121_groups_0, pad = value_states_121_pad_0, pad_type = value_states_121_pad_type_0, strides = value_states_121_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_12873_cast_fp16)[name = string("value_states_121")]; - tensor var_12915 = const()[name = string("op_12915"), val = tensor([1, 4, 256, 64])]; - tensor var_12916 = reshape(shape = var_12915, x = query_states_121)[name = string("op_12916")]; - tensor var_12921 = const()[name = string("op_12921"), val = tensor([0, 1, 3, 2])]; - tensor var_12926 = const()[name = string("op_12926"), val = tensor([1, 1, 256, 64])]; - tensor var_12927 = reshape(shape = var_12926, x = key_states_151)[name = string("op_12927")]; - tensor var_12932 = const()[name = string("op_12932"), val = tensor([0, 1, 3, 2])]; - tensor var_12937 = const()[name = string("op_12937"), val = tensor([1, 1, 256, 64])]; - tensor var_12938 = reshape(shape = var_12937, x = value_states_121)[name = string("op_12938")]; - tensor var_12943 = const()[name = string("op_12943"), val = tensor([0, 1, 3, 2])]; - int32 var_12954 = const()[name = string("op_12954"), val = int32(-1)]; - fp16 const_638_promoted = const()[name = string("const_638_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_245 = transpose(perm = var_12921, x = var_12916)[name = string("transpose_97")]; - tensor var_12956 = mul(x = hidden_states_245, y = const_638_promoted)[name = string("op_12956")]; - bool input_305_interleave_0 = const()[name = string("input_305_interleave_0"), val = bool(false)]; - tensor input_305 = concat(axis = var_12954, interleave = input_305_interleave_0, values = (hidden_states_245, var_12956))[name = string("input_305")]; - tensor normed_365_axes_0 = const()[name = string("normed_365_axes_0"), val = tensor([-1])]; - fp16 var_12951_to_fp16 = const()[name = string("op_12951_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_365_cast_fp16 = layer_norm(axes = normed_365_axes_0, epsilon = var_12951_to_fp16, x = input_305)[name = string("normed_365_cast_fp16")]; - tensor normed_367_begin_0 = const()[name = string("normed_367_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_367_end_0 = const()[name = string("normed_367_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_367_end_mask_0 = const()[name = string("normed_367_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_367 = slice_by_index(begin = normed_367_begin_0, end = normed_367_end_0, end_mask = normed_367_end_mask_0, x = normed_365_cast_fp16)[name = string("normed_367")]; - tensor var_12970_to_fp16 = const()[name = string("op_12970_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337980928)))]; - tensor q_31_cast_fp16 = mul(x = normed_367, y = var_12970_to_fp16)[name = string("q_31_cast_fp16")]; - int32 var_12981 = const()[name = string("op_12981"), val = int32(-1)]; - fp16 const_642_promoted = const()[name = string("const_642_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_247 = transpose(perm = var_12932, x = var_12927)[name = string("transpose_96")]; - tensor var_12983 = mul(x = hidden_states_247, y = const_642_promoted)[name = string("op_12983")]; - bool input_307_interleave_0 = const()[name = string("input_307_interleave_0"), val = bool(false)]; - tensor input_307 = concat(axis = var_12981, interleave = input_307_interleave_0, values = (hidden_states_247, var_12983))[name = string("input_307")]; - tensor normed_369_axes_0 = const()[name = string("normed_369_axes_0"), val = tensor([-1])]; - fp16 var_12978_to_fp16 = const()[name = string("op_12978_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_369_cast_fp16 = layer_norm(axes = normed_369_axes_0, epsilon = var_12978_to_fp16, x = input_307)[name = string("normed_369_cast_fp16")]; - tensor normed_371_begin_0 = const()[name = string("normed_371_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_371_end_0 = const()[name = string("normed_371_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_371_end_mask_0 = const()[name = string("normed_371_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_371 = slice_by_index(begin = normed_371_begin_0, end = normed_371_end_0, end_mask = normed_371_end_mask_0, x = normed_369_cast_fp16)[name = string("normed_371")]; - tensor var_12997_to_fp16 = const()[name = string("op_12997_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337981504)))]; - tensor k_31_cast_fp16 = mul(x = normed_371, y = var_12997_to_fp16)[name = string("k_31_cast_fp16")]; - tensor var_13011_cast_fp16 = mul(x = q_31_cast_fp16, y = cos_5)[name = string("op_13011_cast_fp16")]; - tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_31_cast_fp16)[name = string("x1_61_cast_fp16")]; - tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_31_cast_fp16)[name = string("x2_61_cast_fp16")]; - fp16 const_648_promoted_to_fp16 = const()[name = string("const_648_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13032_cast_fp16 = mul(x = x2_61_cast_fp16, y = const_648_promoted_to_fp16)[name = string("op_13032_cast_fp16")]; - int32 var_13034 = const()[name = string("op_13034"), val = int32(-1)]; - bool var_13035_interleave_0 = const()[name = string("op_13035_interleave_0"), val = bool(false)]; - tensor var_13035_cast_fp16 = concat(axis = var_13034, interleave = var_13035_interleave_0, values = (var_13032_cast_fp16, x1_61_cast_fp16))[name = string("op_13035_cast_fp16")]; - tensor var_13036_cast_fp16 = mul(x = var_13035_cast_fp16, y = sin_5)[name = string("op_13036_cast_fp16")]; - tensor query_states_123_cast_fp16 = add(x = var_13011_cast_fp16, y = var_13036_cast_fp16)[name = string("query_states_123_cast_fp16")]; - tensor var_13039_cast_fp16 = mul(x = k_31_cast_fp16, y = cos_5)[name = string("op_13039_cast_fp16")]; - tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_63_cast_fp16 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_31_cast_fp16)[name = string("x1_63_cast_fp16")]; - tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_63_cast_fp16 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_31_cast_fp16)[name = string("x2_63_cast_fp16")]; - fp16 const_651_promoted_to_fp16 = const()[name = string("const_651_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13060_cast_fp16 = mul(x = x2_63_cast_fp16, y = const_651_promoted_to_fp16)[name = string("op_13060_cast_fp16")]; - int32 var_13062 = const()[name = string("op_13062"), val = int32(-1)]; - bool var_13063_interleave_0 = const()[name = string("op_13063_interleave_0"), val = bool(false)]; - tensor var_13063_cast_fp16 = concat(axis = var_13062, interleave = var_13063_interleave_0, values = (var_13060_cast_fp16, x1_63_cast_fp16))[name = string("op_13063_cast_fp16")]; - tensor var_13064_cast_fp16 = mul(x = var_13063_cast_fp16, y = sin_5)[name = string("op_13064_cast_fp16")]; - tensor key_states_153_cast_fp16 = add(x = var_13039_cast_fp16, y = var_13064_cast_fp16)[name = string("key_states_153_cast_fp16")]; - tensor key_slice_27_begin_0 = const()[name = string("key_slice_27_begin_0"), val = tensor([13, 0, 0, 0])]; - tensor key_slice_27_end_0 = const()[name = string("key_slice_27_end_0"), val = tensor([14, 1, 512, 256])]; - tensor key_slice_27_end_mask_0 = const()[name = string("key_slice_27_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_27_cast_fp16 = slice_by_index(begin = key_slice_27_begin_0, end = key_slice_27_end_0, end_mask = key_slice_27_end_mask_0, x = coreml_update_state_81)[name = string("key_slice_27_cast_fp16")]; - tensor var_13101_begin_0 = const()[name = string("op_13101_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_13101_end_0 = const()[name = string("op_13101_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_13101_end_mask_0 = const()[name = string("op_13101_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_13101_cast_fp16 = slice_by_index(begin = var_13101_begin_0, end = var_13101_end_0, end_mask = var_13101_end_mask_0, x = key_slice_27_cast_fp16)[name = string("op_13101_cast_fp16")]; - int32 var_13128 = const()[name = string("op_13128"), val = int32(2)]; - bool shifted_key_27_interleave_0 = const()[name = string("shifted_key_27_interleave_0"), val = bool(false)]; - tensor shifted_key_27_cast_fp16 = concat(axis = var_13128, interleave = shifted_key_27_interleave_0, values = (var_13101_cast_fp16, key_states_153_cast_fp16))[name = string("shifted_key_27_cast_fp16")]; - tensor concat_218 = const()[name = string("concat_218"), val = tensor([13, 0, 0, 0])]; - tensor concat_219 = const()[name = string("concat_219"), val = tensor([14, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_218, begin_mask = model_model_kv_cache_local_internal_tensor_assign_27_begin_mask_0, end = concat_219, end_mask = model_model_kv_cache_local_internal_tensor_assign_27_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_27_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_27_stride_0, update = shifted_key_27_cast_fp16, x = coreml_update_state_81)[name = string("model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_27_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_186_write_state")]; - tensor coreml_update_state_82 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_186")]; - tensor value_slice_27_begin_0 = const()[name = string("value_slice_27_begin_0"), val = tensor([35, 0, 0, 0])]; - tensor value_slice_27_end_0 = const()[name = string("value_slice_27_end_0"), val = tensor([36, 1, 512, 256])]; - tensor value_slice_27_end_mask_0 = const()[name = string("value_slice_27_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_27_cast_fp16 = slice_by_index(begin = value_slice_27_begin_0, end = value_slice_27_end_0, end_mask = value_slice_27_end_mask_0, x = coreml_update_state_82)[name = string("value_slice_27_cast_fp16")]; - tensor var_13171_begin_0 = const()[name = string("op_13171_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_13171_end_0 = const()[name = string("op_13171_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_13171_end_mask_0 = const()[name = string("op_13171_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_13171_cast_fp16 = slice_by_index(begin = var_13171_begin_0, end = var_13171_end_0, end_mask = var_13171_end_mask_0, x = value_slice_27_cast_fp16)[name = string("op_13171_cast_fp16")]; - int32 var_13198 = const()[name = string("op_13198"), val = int32(2)]; - bool shifted_value_27_interleave_0 = const()[name = string("shifted_value_27_interleave_0"), val = bool(false)]; - tensor value_states_123 = transpose(perm = var_12943, x = var_12938)[name = string("transpose_95")]; - tensor shifted_value_27_cast_fp16 = concat(axis = var_13198, interleave = shifted_value_27_interleave_0, values = (var_13171_cast_fp16, value_states_123))[name = string("shifted_value_27_cast_fp16")]; - tensor concat_220 = const()[name = string("concat_220"), val = tensor([35, 0, 0, 0])]; - tensor concat_221 = const()[name = string("concat_221"), val = tensor([36, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_220, begin_mask = model_model_kv_cache_local_internal_tensor_assign_28_begin_mask_0, end = concat_221, end_mask = model_model_kv_cache_local_internal_tensor_assign_28_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_28_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_28_stride_0, update = shifted_value_27_cast_fp16, x = coreml_update_state_82)[name = string("model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_28_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_187_write_state")]; - tensor coreml_update_state_83 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_187")]; - tensor var_13226_begin_0 = const()[name = string("op_13226_begin_0"), val = tensor([13, 0, 0, 0])]; - tensor var_13226_end_0 = const()[name = string("op_13226_end_0"), val = tensor([14, 1, 512, 256])]; - tensor var_13226_end_mask_0 = const()[name = string("op_13226_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_13226_cast_fp16 = slice_by_index(begin = var_13226_begin_0, end = var_13226_end_0, end_mask = var_13226_end_mask_0, x = coreml_update_state_83)[name = string("op_13226_cast_fp16")]; - tensor var_13233_begin_0 = const()[name = string("op_13233_begin_0"), val = tensor([35, 0, 0, 0])]; - tensor var_13233_end_0 = const()[name = string("op_13233_end_0"), val = tensor([36, 1, 512, 256])]; - tensor var_13233_end_mask_0 = const()[name = string("op_13233_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_13233_cast_fp16 = slice_by_index(begin = var_13233_begin_0, end = var_13233_end_0, end_mask = var_13233_end_mask_0, x = coreml_update_state_83)[name = string("op_13233_cast_fp16")]; - tensor var_13272 = const()[name = string("op_13272"), val = tensor([1, 4, 1, 1])]; - tensor x_245_cast_fp16 = tile(reps = var_13272, x = var_13226_cast_fp16)[name = string("x_245_cast_fp16")]; - tensor var_13292 = const()[name = string("op_13292"), val = tensor([1, 4, 1, 1])]; - tensor x_251_cast_fp16 = tile(reps = var_13292, x = var_13233_cast_fp16)[name = string("x_251_cast_fp16")]; - bool var_13319_transpose_x_0 = const()[name = string("op_13319_transpose_x_0"), val = bool(false)]; - bool var_13319_transpose_y_0 = const()[name = string("op_13319_transpose_y_0"), val = bool(true)]; - tensor var_13319 = matmul(transpose_x = var_13319_transpose_x_0, transpose_y = var_13319_transpose_y_0, x = query_states_123_cast_fp16, y = x_245_cast_fp16)[name = string("op_13319")]; - fp16 var_13320_to_fp16 = const()[name = string("op_13320_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_61_cast_fp16 = mul(x = var_13319, y = var_13320_to_fp16)[name = string("attn_weights_61_cast_fp16")]; - tensor attn_weights_63_cast_fp16 = add(x = attn_weights_61_cast_fp16, y = mask_slice_1)[name = string("attn_weights_63_cast_fp16")]; - int32 var_13355 = const()[name = string("op_13355"), val = int32(-1)]; - tensor var_13357_cast_fp16 = softmax(axis = var_13355, x = attn_weights_63_cast_fp16)[name = string("op_13357_cast_fp16")]; - tensor concat_226 = const()[name = string("concat_226"), val = tensor([4, 64, 512])]; - tensor reshape_45_cast_fp16 = reshape(shape = concat_226, x = var_13357_cast_fp16)[name = string("reshape_45_cast_fp16")]; - tensor concat_227 = const()[name = string("concat_227"), val = tensor([4, 512, 256])]; - tensor reshape_46_cast_fp16 = reshape(shape = concat_227, x = x_251_cast_fp16)[name = string("reshape_46_cast_fp16")]; - bool matmul_15_transpose_x_0 = const()[name = string("matmul_15_transpose_x_0"), val = bool(false)]; - bool matmul_15_transpose_y_0 = const()[name = string("matmul_15_transpose_y_0"), val = bool(false)]; - tensor matmul_15_cast_fp16 = matmul(transpose_x = matmul_15_transpose_x_0, transpose_y = matmul_15_transpose_y_0, x = reshape_45_cast_fp16, y = reshape_46_cast_fp16)[name = string("matmul_15_cast_fp16")]; - tensor concat_231 = const()[name = string("concat_231"), val = tensor([1, 4, 64, 256])]; - tensor reshape_47_cast_fp16 = reshape(shape = concat_231, x = matmul_15_cast_fp16)[name = string("reshape_47_cast_fp16")]; - tensor var_13369_perm_0 = const()[name = string("op_13369_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_13388 = const()[name = string("op_13388"), val = tensor([1, 64, 1024])]; - tensor var_13369_cast_fp16 = transpose(perm = var_13369_perm_0, x = reshape_47_cast_fp16)[name = string("transpose_94")]; - tensor attn_output_155_cast_fp16 = reshape(shape = var_13388, x = var_13369_cast_fp16)[name = string("attn_output_155_cast_fp16")]; - tensor var_13393 = const()[name = string("op_13393"), val = tensor([0, 2, 1])]; - string var_13409_pad_type_0 = const()[name = string("op_13409_pad_type_0"), val = string("valid")]; - int32 var_13409_groups_0 = const()[name = string("op_13409_groups_0"), val = int32(1)]; - tensor var_13409_strides_0 = const()[name = string("op_13409_strides_0"), val = tensor([1])]; - tensor var_13409_pad_0 = const()[name = string("op_13409_pad_0"), val = tensor([0, 0])]; - tensor var_13409_dilations_0 = const()[name = string("op_13409_dilations_0"), val = tensor([1])]; - tensor squeeze_15_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337982080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338866880))))[name = string("squeeze_15_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_13394_cast_fp16 = transpose(perm = var_13393, x = attn_output_155_cast_fp16)[name = string("transpose_93")]; - tensor var_13409_cast_fp16 = conv(dilations = var_13409_dilations_0, groups = var_13409_groups_0, pad = var_13409_pad_0, pad_type = var_13409_pad_type_0, strides = var_13409_strides_0, weight = squeeze_15_cast_fp16_to_fp32_to_fp16_palettized, x = var_13394_cast_fp16)[name = string("op_13409_cast_fp16")]; - tensor var_13413 = const()[name = string("op_13413"), val = tensor([0, 2, 1])]; - int32 var_13424 = const()[name = string("op_13424"), val = int32(-1)]; - fp16 const_662_promoted_to_fp16 = const()[name = string("const_662_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_249_cast_fp16 = transpose(perm = var_13413, x = var_13409_cast_fp16)[name = string("transpose_92")]; - tensor var_13426_cast_fp16 = mul(x = hidden_states_249_cast_fp16, y = const_662_promoted_to_fp16)[name = string("op_13426_cast_fp16")]; - bool input_311_interleave_0 = const()[name = string("input_311_interleave_0"), val = bool(false)]; - tensor input_311_cast_fp16 = concat(axis = var_13424, interleave = input_311_interleave_0, values = (hidden_states_249_cast_fp16, var_13426_cast_fp16))[name = string("input_311_cast_fp16")]; - tensor normed_373_axes_0 = const()[name = string("normed_373_axes_0"), val = tensor([-1])]; - fp16 var_13421_to_fp16 = const()[name = string("op_13421_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_373_cast_fp16 = layer_norm(axes = normed_373_axes_0, epsilon = var_13421_to_fp16, x = input_311_cast_fp16)[name = string("normed_373_cast_fp16")]; - tensor normed_375_begin_0 = const()[name = string("normed_375_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_375_end_0 = const()[name = string("normed_375_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_375_end_mask_0 = const()[name = string("normed_375_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_375_cast_fp16 = slice_by_index(begin = normed_375_begin_0, end = normed_375_end_0, end_mask = normed_375_end_mask_0, x = normed_373_cast_fp16)[name = string("normed_375_cast_fp16")]; - tensor var_13440_to_fp16 = const()[name = string("op_13440_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338885376)))]; - tensor attn_output_159_cast_fp16 = mul(x = normed_375_cast_fp16, y = var_13440_to_fp16)[name = string("attn_output_159_cast_fp16")]; - tensor hidden_states_251_cast_fp16 = add(x = hidden_states_241_cast_fp16, y = attn_output_159_cast_fp16)[name = string("hidden_states_251_cast_fp16")]; - int32 var_13453 = const()[name = string("op_13453"), val = int32(-1)]; - fp16 const_666_promoted_to_fp16 = const()[name = string("const_666_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13455_cast_fp16 = mul(x = hidden_states_251_cast_fp16, y = const_666_promoted_to_fp16)[name = string("op_13455_cast_fp16")]; - bool input_313_interleave_0 = const()[name = string("input_313_interleave_0"), val = bool(false)]; - tensor input_313_cast_fp16 = concat(axis = var_13453, interleave = input_313_interleave_0, values = (hidden_states_251_cast_fp16, var_13455_cast_fp16))[name = string("input_313_cast_fp16")]; - tensor normed_377_axes_0 = const()[name = string("normed_377_axes_0"), val = tensor([-1])]; - fp16 var_13450_to_fp16 = const()[name = string("op_13450_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_377_cast_fp16 = layer_norm(axes = normed_377_axes_0, epsilon = var_13450_to_fp16, x = input_313_cast_fp16)[name = string("normed_377_cast_fp16")]; - tensor normed_379_begin_0 = const()[name = string("normed_379_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_379_end_0 = const()[name = string("normed_379_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_379_end_mask_0 = const()[name = string("normed_379_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_379_cast_fp16 = slice_by_index(begin = normed_379_begin_0, end = normed_379_end_0, end_mask = normed_379_end_mask_0, x = normed_377_cast_fp16)[name = string("normed_379_cast_fp16")]; - tensor var_13469_to_fp16 = const()[name = string("op_13469_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338887744)))]; - tensor x_253_cast_fp16 = mul(x = normed_379_cast_fp16, y = var_13469_to_fp16)[name = string("x_253_cast_fp16")]; - tensor var_13481 = const()[name = string("op_13481"), val = tensor([0, 2, 1])]; - tensor input_315_axes_0 = const()[name = string("input_315_axes_0"), val = tensor([2])]; - tensor var_13482_cast_fp16 = transpose(perm = var_13481, x = x_253_cast_fp16)[name = string("transpose_91")]; - tensor input_315_cast_fp16 = expand_dims(axes = input_315_axes_0, x = var_13482_cast_fp16)[name = string("input_315_cast_fp16")]; - string x_255_pad_type_0 = const()[name = string("x_255_pad_type_0"), val = string("valid")]; - tensor x_255_strides_0 = const()[name = string("x_255_strides_0"), val = tensor([1, 1])]; - tensor x_255_pad_0 = const()[name = string("x_255_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_255_dilations_0 = const()[name = string("x_255_dilations_0"), val = tensor([1, 1])]; - int32 x_255_groups_0 = const()[name = string("x_255_groups_0"), val = int32(1)]; - tensor model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1373915840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1379887872))))[name = string("model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_255_cast_fp16 = conv(dilations = x_255_dilations_0, groups = x_255_groups_0, pad = x_255_pad_0, pad_type = x_255_pad_type_0, strides = x_255_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_315_cast_fp16)[name = string("x_255_cast_fp16")]; - string b_31_pad_type_0 = const()[name = string("b_31_pad_type_0"), val = string("valid")]; - tensor b_31_strides_0 = const()[name = string("b_31_strides_0"), val = tensor([1, 1])]; - tensor b_31_pad_0 = const()[name = string("b_31_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_31_dilations_0 = const()[name = string("b_31_dilations_0"), val = tensor([1, 1])]; - int32 b_31_groups_0 = const()[name = string("b_31_groups_0"), val = int32(1)]; - tensor model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1379998528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1385970560))))[name = string("model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_31_cast_fp16 = conv(dilations = b_31_dilations_0, groups = b_31_groups_0, pad = b_31_pad_0, pad_type = b_31_pad_type_0, strides = b_31_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_315_cast_fp16)[name = string("b_31_cast_fp16")]; - string var_13507_mode_0 = const()[name = string("op_13507_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_13507_cast_fp16 = gelu(mode = var_13507_mode_0, x = x_255_cast_fp16)[name = string("op_13507_cast_fp16")]; - tensor input_317_cast_fp16 = mul(x = var_13507_cast_fp16, y = b_31_cast_fp16)[name = string("input_317_cast_fp16")]; - string e_31_pad_type_0 = const()[name = string("e_31_pad_type_0"), val = string("valid")]; - tensor e_31_strides_0 = const()[name = string("e_31_strides_0"), val = tensor([1, 1])]; - tensor e_31_pad_0 = const()[name = string("e_31_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_31_dilations_0 = const()[name = string("e_31_dilations_0"), val = tensor([1, 1])]; - int32 e_31_groups_0 = const()[name = string("e_31_groups_0"), val = int32(1)]; - tensor model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351055488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357027520))))[name = string("model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_31_cast_fp16 = conv(dilations = e_31_dilations_0, groups = e_31_groups_0, pad = e_31_pad_0, pad_type = e_31_pad_type_0, strides = e_31_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_317_cast_fp16)[name = string("e_31_cast_fp16")]; - tensor var_13515_axes_0 = const()[name = string("op_13515_axes_0"), val = tensor([2])]; - tensor var_13515_cast_fp16 = squeeze(axes = var_13515_axes_0, x = e_31_cast_fp16)[name = string("op_13515_cast_fp16")]; - tensor var_13516 = const()[name = string("op_13516"), val = tensor([0, 2, 1])]; - int32 var_13527 = const()[name = string("op_13527"), val = int32(-1)]; - fp16 const_670_promoted_to_fp16 = const()[name = string("const_670_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_253_cast_fp16 = transpose(perm = var_13516, x = var_13515_cast_fp16)[name = string("transpose_90")]; - tensor var_13529_cast_fp16 = mul(x = hidden_states_253_cast_fp16, y = const_670_promoted_to_fp16)[name = string("op_13529_cast_fp16")]; - bool input_319_interleave_0 = const()[name = string("input_319_interleave_0"), val = bool(false)]; - tensor input_319_cast_fp16 = concat(axis = var_13527, interleave = input_319_interleave_0, values = (hidden_states_253_cast_fp16, var_13529_cast_fp16))[name = string("input_319_cast_fp16")]; - tensor normed_381_axes_0 = const()[name = string("normed_381_axes_0"), val = tensor([-1])]; - fp16 var_13524_to_fp16 = const()[name = string("op_13524_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_381_cast_fp16 = layer_norm(axes = normed_381_axes_0, epsilon = var_13524_to_fp16, x = input_319_cast_fp16)[name = string("normed_381_cast_fp16")]; - tensor normed_383_begin_0 = const()[name = string("normed_383_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_383_end_0 = const()[name = string("normed_383_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_383_end_mask_0 = const()[name = string("normed_383_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_383_cast_fp16 = slice_by_index(begin = normed_383_begin_0, end = normed_383_end_0, end_mask = normed_383_end_mask_0, x = normed_381_cast_fp16)[name = string("normed_383_cast_fp16")]; - tensor var_13543_to_fp16 = const()[name = string("op_13543_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357046016)))]; - tensor hidden_states_255_cast_fp16 = mul(x = normed_383_cast_fp16, y = var_13543_to_fp16)[name = string("hidden_states_255_cast_fp16")]; - tensor hidden_states_257_cast_fp16 = add(x = hidden_states_251_cast_fp16, y = hidden_states_255_cast_fp16)[name = string("hidden_states_257_cast_fp16")]; - int32 var_13597 = const()[name = string("op_13597"), val = int32(-1)]; - fp16 const_675_promoted_to_fp16 = const()[name = string("const_675_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13599_cast_fp16 = mul(x = hidden_states_257_cast_fp16, y = const_675_promoted_to_fp16)[name = string("op_13599_cast_fp16")]; - bool input_321_interleave_0 = const()[name = string("input_321_interleave_0"), val = bool(false)]; - tensor input_321_cast_fp16 = concat(axis = var_13597, interleave = input_321_interleave_0, values = (hidden_states_257_cast_fp16, var_13599_cast_fp16))[name = string("input_321_cast_fp16")]; - tensor normed_385_axes_0 = const()[name = string("normed_385_axes_0"), val = tensor([-1])]; - fp16 var_13594_to_fp16 = const()[name = string("op_13594_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_385_cast_fp16 = layer_norm(axes = normed_385_axes_0, epsilon = var_13594_to_fp16, x = input_321_cast_fp16)[name = string("normed_385_cast_fp16")]; - tensor normed_387_begin_0 = const()[name = string("normed_387_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_387_end_0 = const()[name = string("normed_387_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_387_end_mask_0 = const()[name = string("normed_387_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_387_cast_fp16 = slice_by_index(begin = normed_387_begin_0, end = normed_387_end_0, end_mask = normed_387_end_mask_0, x = normed_385_cast_fp16)[name = string("normed_387_cast_fp16")]; - tensor var_13613_to_fp16 = const()[name = string("op_13613_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357048384)))]; - tensor hidden_states_259_cast_fp16 = mul(x = normed_387_cast_fp16, y = var_13613_to_fp16)[name = string("hidden_states_259_cast_fp16")]; - tensor var_13624 = const()[name = string("op_13624"), val = tensor([0, 2, 1])]; - tensor var_13627_axes_0 = const()[name = string("op_13627_axes_0"), val = tensor([2])]; - tensor var_13625_cast_fp16 = transpose(perm = var_13624, x = hidden_states_259_cast_fp16)[name = string("transpose_89")]; - tensor var_13627_cast_fp16 = expand_dims(axes = var_13627_axes_0, x = var_13625_cast_fp16)[name = string("op_13627_cast_fp16")]; - string query_states_129_pad_type_0 = const()[name = string("query_states_129_pad_type_0"), val = string("valid")]; - tensor query_states_129_strides_0 = const()[name = string("query_states_129_strides_0"), val = tensor([1, 1])]; - tensor query_states_129_pad_0 = const()[name = string("query_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_129_dilations_0 = const()[name = string("query_states_129_dilations_0"), val = tensor([1, 1])]; - int32 query_states_129_groups_0 = const()[name = string("query_states_129_groups_0"), val = int32(1)]; - tensor query_states_129 = conv(dilations = query_states_129_dilations_0, groups = query_states_129_groups_0, pad = query_states_129_pad_0, pad_type = query_states_129_pad_type_0, strides = query_states_129_strides_0, weight = model_model_layers_16_self_attn_q_proj_weight_palettized, x = var_13627_cast_fp16)[name = string("query_states_129")]; - string key_states_161_pad_type_0 = const()[name = string("key_states_161_pad_type_0"), val = string("valid")]; - tensor key_states_161_strides_0 = const()[name = string("key_states_161_strides_0"), val = tensor([1, 1])]; - tensor key_states_161_pad_0 = const()[name = string("key_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_161_dilations_0 = const()[name = string("key_states_161_dilations_0"), val = tensor([1, 1])]; - int32 key_states_161_groups_0 = const()[name = string("key_states_161_groups_0"), val = int32(1)]; - tensor key_states_161 = conv(dilations = key_states_161_dilations_0, groups = key_states_161_groups_0, pad = key_states_161_pad_0, pad_type = key_states_161_pad_type_0, strides = key_states_161_strides_0, weight = model_model_layers_16_self_attn_k_proj_weight_palettized, x = var_13627_cast_fp16)[name = string("key_states_161")]; - string value_states_129_pad_type_0 = const()[name = string("value_states_129_pad_type_0"), val = string("valid")]; - tensor value_states_129_strides_0 = const()[name = string("value_states_129_strides_0"), val = tensor([1, 1])]; - tensor value_states_129_pad_0 = const()[name = string("value_states_129_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_129_dilations_0 = const()[name = string("value_states_129_dilations_0"), val = tensor([1, 1])]; - int32 value_states_129_groups_0 = const()[name = string("value_states_129_groups_0"), val = int32(1)]; - tensor value_states_129 = conv(dilations = value_states_129_dilations_0, groups = value_states_129_groups_0, pad = value_states_129_pad_0, pad_type = value_states_129_pad_type_0, strides = value_states_129_strides_0, weight = model_model_layers_16_self_attn_v_proj_weight_palettized, x = var_13627_cast_fp16)[name = string("value_states_129")]; - tensor var_13669 = const()[name = string("op_13669"), val = tensor([1, 4, 256, 64])]; - tensor var_13670 = reshape(shape = var_13669, x = query_states_129)[name = string("op_13670")]; - tensor var_13675 = const()[name = string("op_13675"), val = tensor([0, 1, 3, 2])]; - tensor var_13680 = const()[name = string("op_13680"), val = tensor([1, 1, 256, 64])]; - tensor var_13681 = reshape(shape = var_13680, x = key_states_161)[name = string("op_13681")]; - tensor var_13686 = const()[name = string("op_13686"), val = tensor([0, 1, 3, 2])]; - tensor var_13691 = const()[name = string("op_13691"), val = tensor([1, 1, 256, 64])]; - tensor var_13692 = reshape(shape = var_13691, x = value_states_129)[name = string("op_13692")]; - tensor var_13697 = const()[name = string("op_13697"), val = tensor([0, 1, 3, 2])]; - int32 var_13708 = const()[name = string("op_13708"), val = int32(-1)]; - fp16 const_680_promoted = const()[name = string("const_680_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_261 = transpose(perm = var_13675, x = var_13670)[name = string("transpose_88")]; - tensor var_13710 = mul(x = hidden_states_261, y = const_680_promoted)[name = string("op_13710")]; - bool input_325_interleave_0 = const()[name = string("input_325_interleave_0"), val = bool(false)]; - tensor input_325 = concat(axis = var_13708, interleave = input_325_interleave_0, values = (hidden_states_261, var_13710))[name = string("input_325")]; - tensor normed_389_axes_0 = const()[name = string("normed_389_axes_0"), val = tensor([-1])]; - fp16 var_13705_to_fp16 = const()[name = string("op_13705_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_389_cast_fp16 = layer_norm(axes = normed_389_axes_0, epsilon = var_13705_to_fp16, x = input_325)[name = string("normed_389_cast_fp16")]; - tensor normed_391_begin_0 = const()[name = string("normed_391_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_391_end_0 = const()[name = string("normed_391_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_391_end_mask_0 = const()[name = string("normed_391_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_391 = slice_by_index(begin = normed_391_begin_0, end = normed_391_end_0, end_mask = normed_391_end_mask_0, x = normed_389_cast_fp16)[name = string("normed_391")]; - tensor var_13724_to_fp16 = const()[name = string("op_13724_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357050752)))]; - tensor q_33_cast_fp16 = mul(x = normed_391, y = var_13724_to_fp16)[name = string("q_33_cast_fp16")]; - int32 var_13735 = const()[name = string("op_13735"), val = int32(-1)]; - fp16 const_684_promoted = const()[name = string("const_684_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_263 = transpose(perm = var_13686, x = var_13681)[name = string("transpose_87")]; - tensor var_13737 = mul(x = hidden_states_263, y = const_684_promoted)[name = string("op_13737")]; - bool input_327_interleave_0 = const()[name = string("input_327_interleave_0"), val = bool(false)]; - tensor input_327 = concat(axis = var_13735, interleave = input_327_interleave_0, values = (hidden_states_263, var_13737))[name = string("input_327")]; - tensor normed_393_axes_0 = const()[name = string("normed_393_axes_0"), val = tensor([-1])]; - fp16 var_13732_to_fp16 = const()[name = string("op_13732_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_393_cast_fp16 = layer_norm(axes = normed_393_axes_0, epsilon = var_13732_to_fp16, x = input_327)[name = string("normed_393_cast_fp16")]; - tensor normed_395_begin_0 = const()[name = string("normed_395_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_395_end_0 = const()[name = string("normed_395_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_395_end_mask_0 = const()[name = string("normed_395_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_395 = slice_by_index(begin = normed_395_begin_0, end = normed_395_end_0, end_mask = normed_395_end_mask_0, x = normed_393_cast_fp16)[name = string("normed_395")]; - tensor var_13751_to_fp16 = const()[name = string("op_13751_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357051328)))]; - tensor k_33_cast_fp16 = mul(x = normed_395, y = var_13751_to_fp16)[name = string("k_33_cast_fp16")]; - tensor var_13765_cast_fp16 = mul(x = q_33_cast_fp16, y = cos_5)[name = string("op_13765_cast_fp16")]; - tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_65_cast_fp16 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_33_cast_fp16)[name = string("x1_65_cast_fp16")]; - tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_65_cast_fp16 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_33_cast_fp16)[name = string("x2_65_cast_fp16")]; - fp16 const_690_promoted_to_fp16 = const()[name = string("const_690_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13786_cast_fp16 = mul(x = x2_65_cast_fp16, y = const_690_promoted_to_fp16)[name = string("op_13786_cast_fp16")]; - int32 var_13788 = const()[name = string("op_13788"), val = int32(-1)]; - bool var_13789_interleave_0 = const()[name = string("op_13789_interleave_0"), val = bool(false)]; - tensor var_13789_cast_fp16 = concat(axis = var_13788, interleave = var_13789_interleave_0, values = (var_13786_cast_fp16, x1_65_cast_fp16))[name = string("op_13789_cast_fp16")]; - tensor var_13790_cast_fp16 = mul(x = var_13789_cast_fp16, y = sin_5)[name = string("op_13790_cast_fp16")]; - tensor query_states_131_cast_fp16 = add(x = var_13765_cast_fp16, y = var_13790_cast_fp16)[name = string("query_states_131_cast_fp16")]; - tensor var_13793_cast_fp16 = mul(x = k_33_cast_fp16, y = cos_5)[name = string("op_13793_cast_fp16")]; - tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_67_cast_fp16 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_33_cast_fp16)[name = string("x1_67_cast_fp16")]; - tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_67_cast_fp16 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_33_cast_fp16)[name = string("x2_67_cast_fp16")]; - fp16 const_693_promoted_to_fp16 = const()[name = string("const_693_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_13814_cast_fp16 = mul(x = x2_67_cast_fp16, y = const_693_promoted_to_fp16)[name = string("op_13814_cast_fp16")]; - int32 var_13816 = const()[name = string("op_13816"), val = int32(-1)]; - bool var_13817_interleave_0 = const()[name = string("op_13817_interleave_0"), val = bool(false)]; - tensor var_13817_cast_fp16 = concat(axis = var_13816, interleave = var_13817_interleave_0, values = (var_13814_cast_fp16, x1_67_cast_fp16))[name = string("op_13817_cast_fp16")]; - tensor var_13818_cast_fp16 = mul(x = var_13817_cast_fp16, y = sin_5)[name = string("op_13818_cast_fp16")]; - tensor key_states_163_cast_fp16 = add(x = var_13793_cast_fp16, y = var_13818_cast_fp16)[name = string("key_states_163_cast_fp16")]; - tensor key_slice_29_begin_0 = const()[name = string("key_slice_29_begin_0"), val = tensor([14, 0, 0, 0])]; - tensor key_slice_29_end_0 = const()[name = string("key_slice_29_end_0"), val = tensor([15, 1, 512, 256])]; - tensor key_slice_29_end_mask_0 = const()[name = string("key_slice_29_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_29_cast_fp16 = slice_by_index(begin = key_slice_29_begin_0, end = key_slice_29_end_0, end_mask = key_slice_29_end_mask_0, x = coreml_update_state_83)[name = string("key_slice_29_cast_fp16")]; - tensor var_13855_begin_0 = const()[name = string("op_13855_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_13855_end_0 = const()[name = string("op_13855_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_13855_end_mask_0 = const()[name = string("op_13855_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_13855_cast_fp16 = slice_by_index(begin = var_13855_begin_0, end = var_13855_end_0, end_mask = var_13855_end_mask_0, x = key_slice_29_cast_fp16)[name = string("op_13855_cast_fp16")]; - int32 var_13882 = const()[name = string("op_13882"), val = int32(2)]; - bool shifted_key_29_interleave_0 = const()[name = string("shifted_key_29_interleave_0"), val = bool(false)]; - tensor shifted_key_29_cast_fp16 = concat(axis = var_13882, interleave = shifted_key_29_interleave_0, values = (var_13855_cast_fp16, key_states_163_cast_fp16))[name = string("shifted_key_29_cast_fp16")]; - tensor concat_232 = const()[name = string("concat_232"), val = tensor([14, 0, 0, 0])]; - tensor concat_233 = const()[name = string("concat_233"), val = tensor([15, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_232, begin_mask = model_model_kv_cache_local_internal_tensor_assign_29_begin_mask_0, end = concat_233, end_mask = model_model_kv_cache_local_internal_tensor_assign_29_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_29_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_29_stride_0, update = shifted_key_29_cast_fp16, x = coreml_update_state_83)[name = string("model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_29_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_188_write_state")]; - tensor coreml_update_state_84 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_188")]; - tensor value_slice_29_begin_0 = const()[name = string("value_slice_29_begin_0"), val = tensor([36, 0, 0, 0])]; - tensor value_slice_29_end_0 = const()[name = string("value_slice_29_end_0"), val = tensor([37, 1, 512, 256])]; - tensor value_slice_29_end_mask_0 = const()[name = string("value_slice_29_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_29_cast_fp16 = slice_by_index(begin = value_slice_29_begin_0, end = value_slice_29_end_0, end_mask = value_slice_29_end_mask_0, x = coreml_update_state_84)[name = string("value_slice_29_cast_fp16")]; - tensor var_13925_begin_0 = const()[name = string("op_13925_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_13925_end_0 = const()[name = string("op_13925_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_13925_end_mask_0 = const()[name = string("op_13925_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_13925_cast_fp16 = slice_by_index(begin = var_13925_begin_0, end = var_13925_end_0, end_mask = var_13925_end_mask_0, x = value_slice_29_cast_fp16)[name = string("op_13925_cast_fp16")]; - int32 var_13952 = const()[name = string("op_13952"), val = int32(2)]; - bool shifted_value_29_interleave_0 = const()[name = string("shifted_value_29_interleave_0"), val = bool(false)]; - tensor value_states_131 = transpose(perm = var_13697, x = var_13692)[name = string("transpose_86")]; - tensor shifted_value_29_cast_fp16 = concat(axis = var_13952, interleave = shifted_value_29_interleave_0, values = (var_13925_cast_fp16, value_states_131))[name = string("shifted_value_29_cast_fp16")]; - tensor concat_234 = const()[name = string("concat_234"), val = tensor([36, 0, 0, 0])]; - tensor concat_235 = const()[name = string("concat_235"), val = tensor([37, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_234, begin_mask = model_model_kv_cache_local_internal_tensor_assign_30_begin_mask_0, end = concat_235, end_mask = model_model_kv_cache_local_internal_tensor_assign_30_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_30_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_30_stride_0, update = shifted_value_29_cast_fp16, x = coreml_update_state_84)[name = string("model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_30_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_189_write_state")]; - tensor coreml_update_state_85 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_189")]; - tensor var_13980_begin_0 = const()[name = string("op_13980_begin_0"), val = tensor([14, 0, 0, 0])]; - tensor var_13980_end_0 = const()[name = string("op_13980_end_0"), val = tensor([15, 1, 512, 256])]; - tensor var_13980_end_mask_0 = const()[name = string("op_13980_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_13980_cast_fp16 = slice_by_index(begin = var_13980_begin_0, end = var_13980_end_0, end_mask = var_13980_end_mask_0, x = coreml_update_state_85)[name = string("op_13980_cast_fp16")]; - tensor var_13987_begin_0 = const()[name = string("op_13987_begin_0"), val = tensor([36, 0, 0, 0])]; - tensor var_13987_end_0 = const()[name = string("op_13987_end_0"), val = tensor([37, 1, 512, 256])]; - tensor var_13987_end_mask_0 = const()[name = string("op_13987_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_13987_cast_fp16 = slice_by_index(begin = var_13987_begin_0, end = var_13987_end_0, end_mask = var_13987_end_mask_0, x = coreml_update_state_85)[name = string("op_13987_cast_fp16")]; - tensor var_14026 = const()[name = string("op_14026"), val = tensor([1, 4, 1, 1])]; - tensor x_261_cast_fp16 = tile(reps = var_14026, x = var_13980_cast_fp16)[name = string("x_261_cast_fp16")]; - tensor var_14046 = const()[name = string("op_14046"), val = tensor([1, 4, 1, 1])]; - tensor x_267_cast_fp16 = tile(reps = var_14046, x = var_13987_cast_fp16)[name = string("x_267_cast_fp16")]; - bool var_14073_transpose_x_0 = const()[name = string("op_14073_transpose_x_0"), val = bool(false)]; - bool var_14073_transpose_y_0 = const()[name = string("op_14073_transpose_y_0"), val = bool(true)]; - tensor var_14073 = matmul(transpose_x = var_14073_transpose_x_0, transpose_y = var_14073_transpose_y_0, x = query_states_131_cast_fp16, y = x_261_cast_fp16)[name = string("op_14073")]; - fp16 var_14074_to_fp16 = const()[name = string("op_14074_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_65_cast_fp16 = mul(x = var_14073, y = var_14074_to_fp16)[name = string("attn_weights_65_cast_fp16")]; - tensor attn_weights_67_cast_fp16 = add(x = attn_weights_65_cast_fp16, y = mask_slice_1)[name = string("attn_weights_67_cast_fp16")]; - int32 var_14109 = const()[name = string("op_14109"), val = int32(-1)]; - tensor var_14111_cast_fp16 = softmax(axis = var_14109, x = attn_weights_67_cast_fp16)[name = string("op_14111_cast_fp16")]; - tensor concat_240 = const()[name = string("concat_240"), val = tensor([4, 64, 512])]; - tensor reshape_48_cast_fp16 = reshape(shape = concat_240, x = var_14111_cast_fp16)[name = string("reshape_48_cast_fp16")]; - tensor concat_241 = const()[name = string("concat_241"), val = tensor([4, 512, 256])]; - tensor reshape_49_cast_fp16 = reshape(shape = concat_241, x = x_267_cast_fp16)[name = string("reshape_49_cast_fp16")]; - bool matmul_16_transpose_x_0 = const()[name = string("matmul_16_transpose_x_0"), val = bool(false)]; - bool matmul_16_transpose_y_0 = const()[name = string("matmul_16_transpose_y_0"), val = bool(false)]; - tensor matmul_16_cast_fp16 = matmul(transpose_x = matmul_16_transpose_x_0, transpose_y = matmul_16_transpose_y_0, x = reshape_48_cast_fp16, y = reshape_49_cast_fp16)[name = string("matmul_16_cast_fp16")]; - tensor concat_245 = const()[name = string("concat_245"), val = tensor([1, 4, 64, 256])]; - tensor reshape_50_cast_fp16 = reshape(shape = concat_245, x = matmul_16_cast_fp16)[name = string("reshape_50_cast_fp16")]; - tensor var_14123_perm_0 = const()[name = string("op_14123_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_14142 = const()[name = string("op_14142"), val = tensor([1, 64, 1024])]; - tensor var_14123_cast_fp16 = transpose(perm = var_14123_perm_0, x = reshape_50_cast_fp16)[name = string("transpose_85")]; - tensor attn_output_165_cast_fp16 = reshape(shape = var_14142, x = var_14123_cast_fp16)[name = string("attn_output_165_cast_fp16")]; - tensor var_14147 = const()[name = string("op_14147"), val = tensor([0, 2, 1])]; - string var_14163_pad_type_0 = const()[name = string("op_14163_pad_type_0"), val = string("valid")]; - int32 var_14163_groups_0 = const()[name = string("op_14163_groups_0"), val = int32(1)]; - tensor var_14163_strides_0 = const()[name = string("op_14163_strides_0"), val = tensor([1])]; - tensor var_14163_pad_0 = const()[name = string("op_14163_pad_0"), val = tensor([0, 0])]; - tensor var_14163_dilations_0 = const()[name = string("op_14163_dilations_0"), val = tensor([1])]; - tensor squeeze_16_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357051904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357936704))))[name = string("squeeze_16_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_14148_cast_fp16 = transpose(perm = var_14147, x = attn_output_165_cast_fp16)[name = string("transpose_84")]; - tensor var_14163_cast_fp16 = conv(dilations = var_14163_dilations_0, groups = var_14163_groups_0, pad = var_14163_pad_0, pad_type = var_14163_pad_type_0, strides = var_14163_strides_0, weight = squeeze_16_cast_fp16_to_fp32_to_fp16_palettized, x = var_14148_cast_fp16)[name = string("op_14163_cast_fp16")]; - tensor var_14167 = const()[name = string("op_14167"), val = tensor([0, 2, 1])]; - int32 var_14178 = const()[name = string("op_14178"), val = int32(-1)]; - fp16 const_704_promoted_to_fp16 = const()[name = string("const_704_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_265_cast_fp16 = transpose(perm = var_14167, x = var_14163_cast_fp16)[name = string("transpose_83")]; - tensor var_14180_cast_fp16 = mul(x = hidden_states_265_cast_fp16, y = const_704_promoted_to_fp16)[name = string("op_14180_cast_fp16")]; - bool input_331_interleave_0 = const()[name = string("input_331_interleave_0"), val = bool(false)]; - tensor input_331_cast_fp16 = concat(axis = var_14178, interleave = input_331_interleave_0, values = (hidden_states_265_cast_fp16, var_14180_cast_fp16))[name = string("input_331_cast_fp16")]; - tensor normed_397_axes_0 = const()[name = string("normed_397_axes_0"), val = tensor([-1])]; - fp16 var_14175_to_fp16 = const()[name = string("op_14175_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_397_cast_fp16 = layer_norm(axes = normed_397_axes_0, epsilon = var_14175_to_fp16, x = input_331_cast_fp16)[name = string("normed_397_cast_fp16")]; - tensor normed_399_begin_0 = const()[name = string("normed_399_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_399_end_0 = const()[name = string("normed_399_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_399_end_mask_0 = const()[name = string("normed_399_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_399_cast_fp16 = slice_by_index(begin = normed_399_begin_0, end = normed_399_end_0, end_mask = normed_399_end_mask_0, x = normed_397_cast_fp16)[name = string("normed_399_cast_fp16")]; - tensor var_14194_to_fp16 = const()[name = string("op_14194_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357955200)))]; - tensor attn_output_169_cast_fp16 = mul(x = normed_399_cast_fp16, y = var_14194_to_fp16)[name = string("attn_output_169_cast_fp16")]; - tensor hidden_states_267_cast_fp16 = add(x = hidden_states_257_cast_fp16, y = attn_output_169_cast_fp16)[name = string("hidden_states_267_cast_fp16")]; - int32 var_14207 = const()[name = string("op_14207"), val = int32(-1)]; - fp16 const_708_promoted_to_fp16 = const()[name = string("const_708_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14209_cast_fp16 = mul(x = hidden_states_267_cast_fp16, y = const_708_promoted_to_fp16)[name = string("op_14209_cast_fp16")]; - bool input_333_interleave_0 = const()[name = string("input_333_interleave_0"), val = bool(false)]; - tensor input_333_cast_fp16 = concat(axis = var_14207, interleave = input_333_interleave_0, values = (hidden_states_267_cast_fp16, var_14209_cast_fp16))[name = string("input_333_cast_fp16")]; - tensor normed_401_axes_0 = const()[name = string("normed_401_axes_0"), val = tensor([-1])]; - fp16 var_14204_to_fp16 = const()[name = string("op_14204_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_401_cast_fp16 = layer_norm(axes = normed_401_axes_0, epsilon = var_14204_to_fp16, x = input_333_cast_fp16)[name = string("normed_401_cast_fp16")]; - tensor normed_403_begin_0 = const()[name = string("normed_403_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_403_end_0 = const()[name = string("normed_403_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_403_end_mask_0 = const()[name = string("normed_403_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_403_cast_fp16 = slice_by_index(begin = normed_403_begin_0, end = normed_403_end_0, end_mask = normed_403_end_mask_0, x = normed_401_cast_fp16)[name = string("normed_403_cast_fp16")]; - tensor var_14223_to_fp16 = const()[name = string("op_14223_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357957568)))]; - tensor x_269_cast_fp16 = mul(x = normed_403_cast_fp16, y = var_14223_to_fp16)[name = string("x_269_cast_fp16")]; - tensor var_14235 = const()[name = string("op_14235"), val = tensor([0, 2, 1])]; - tensor input_335_axes_0 = const()[name = string("input_335_axes_0"), val = tensor([2])]; - tensor var_14236_cast_fp16 = transpose(perm = var_14235, x = x_269_cast_fp16)[name = string("transpose_82")]; - tensor input_335_cast_fp16 = expand_dims(axes = input_335_axes_0, x = var_14236_cast_fp16)[name = string("input_335_cast_fp16")]; - string x_271_pad_type_0 = const()[name = string("x_271_pad_type_0"), val = string("valid")]; - tensor x_271_strides_0 = const()[name = string("x_271_strides_0"), val = tensor([1, 1])]; - tensor x_271_pad_0 = const()[name = string("x_271_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_271_dilations_0 = const()[name = string("x_271_dilations_0"), val = tensor([1, 1])]; - int32 x_271_groups_0 = const()[name = string("x_271_groups_0"), val = int32(1)]; - tensor model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1386081216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1392053248))))[name = string("model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_271_cast_fp16 = conv(dilations = x_271_dilations_0, groups = x_271_groups_0, pad = x_271_pad_0, pad_type = x_271_pad_type_0, strides = x_271_strides_0, weight = model_model_layers_16_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_335_cast_fp16)[name = string("x_271_cast_fp16")]; - string b_33_pad_type_0 = const()[name = string("b_33_pad_type_0"), val = string("valid")]; - tensor b_33_strides_0 = const()[name = string("b_33_strides_0"), val = tensor([1, 1])]; - tensor b_33_pad_0 = const()[name = string("b_33_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_33_dilations_0 = const()[name = string("b_33_dilations_0"), val = tensor([1, 1])]; - int32 b_33_groups_0 = const()[name = string("b_33_groups_0"), val = int32(1)]; - tensor model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1392163904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1398135936))))[name = string("model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_33_cast_fp16 = conv(dilations = b_33_dilations_0, groups = b_33_groups_0, pad = b_33_pad_0, pad_type = b_33_pad_type_0, strides = b_33_strides_0, weight = model_model_layers_16_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_335_cast_fp16)[name = string("b_33_cast_fp16")]; - string var_14261_mode_0 = const()[name = string("op_14261_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_14261_cast_fp16 = gelu(mode = var_14261_mode_0, x = x_271_cast_fp16)[name = string("op_14261_cast_fp16")]; - tensor input_337_cast_fp16 = mul(x = var_14261_cast_fp16, y = b_33_cast_fp16)[name = string("input_337_cast_fp16")]; - string e_33_pad_type_0 = const()[name = string("e_33_pad_type_0"), val = string("valid")]; - tensor e_33_strides_0 = const()[name = string("e_33_strides_0"), val = tensor([1, 1])]; - tensor e_33_pad_0 = const()[name = string("e_33_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_33_dilations_0 = const()[name = string("e_33_dilations_0"), val = tensor([1, 1])]; - int32 e_33_groups_0 = const()[name = string("e_33_groups_0"), val = int32(1)]; - tensor model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370125312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376097344))))[name = string("model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_33_cast_fp16 = conv(dilations = e_33_dilations_0, groups = e_33_groups_0, pad = e_33_pad_0, pad_type = e_33_pad_type_0, strides = e_33_strides_0, weight = model_model_layers_16_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_337_cast_fp16)[name = string("e_33_cast_fp16")]; - tensor var_14269_axes_0 = const()[name = string("op_14269_axes_0"), val = tensor([2])]; - tensor var_14269_cast_fp16 = squeeze(axes = var_14269_axes_0, x = e_33_cast_fp16)[name = string("op_14269_cast_fp16")]; - tensor var_14270 = const()[name = string("op_14270"), val = tensor([0, 2, 1])]; - int32 var_14281 = const()[name = string("op_14281"), val = int32(-1)]; - fp16 const_712_promoted_to_fp16 = const()[name = string("const_712_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_269_cast_fp16 = transpose(perm = var_14270, x = var_14269_cast_fp16)[name = string("transpose_81")]; - tensor var_14283_cast_fp16 = mul(x = hidden_states_269_cast_fp16, y = const_712_promoted_to_fp16)[name = string("op_14283_cast_fp16")]; - bool input_339_interleave_0 = const()[name = string("input_339_interleave_0"), val = bool(false)]; - tensor input_339_cast_fp16 = concat(axis = var_14281, interleave = input_339_interleave_0, values = (hidden_states_269_cast_fp16, var_14283_cast_fp16))[name = string("input_339_cast_fp16")]; - tensor normed_405_axes_0 = const()[name = string("normed_405_axes_0"), val = tensor([-1])]; - fp16 var_14278_to_fp16 = const()[name = string("op_14278_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_405_cast_fp16 = layer_norm(axes = normed_405_axes_0, epsilon = var_14278_to_fp16, x = input_339_cast_fp16)[name = string("normed_405_cast_fp16")]; - tensor normed_407_begin_0 = const()[name = string("normed_407_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_407_end_0 = const()[name = string("normed_407_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_407_end_mask_0 = const()[name = string("normed_407_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_407_cast_fp16 = slice_by_index(begin = normed_407_begin_0, end = normed_407_end_0, end_mask = normed_407_end_mask_0, x = normed_405_cast_fp16)[name = string("normed_407_cast_fp16")]; - tensor var_14297_to_fp16 = const()[name = string("op_14297_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376115840)))]; - tensor hidden_states_271_cast_fp16 = mul(x = normed_407_cast_fp16, y = var_14297_to_fp16)[name = string("hidden_states_271_cast_fp16")]; - tensor hidden_states_273_cast_fp16 = add(x = hidden_states_267_cast_fp16, y = hidden_states_271_cast_fp16)[name = string("hidden_states_273_cast_fp16")]; - int32 var_14351 = const()[name = string("op_14351"), val = int32(-1)]; - fp16 const_717_promoted_to_fp16 = const()[name = string("const_717_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14353_cast_fp16 = mul(x = hidden_states_273_cast_fp16, y = const_717_promoted_to_fp16)[name = string("op_14353_cast_fp16")]; - bool input_341_interleave_0 = const()[name = string("input_341_interleave_0"), val = bool(false)]; - tensor input_341_cast_fp16 = concat(axis = var_14351, interleave = input_341_interleave_0, values = (hidden_states_273_cast_fp16, var_14353_cast_fp16))[name = string("input_341_cast_fp16")]; - tensor normed_409_axes_0 = const()[name = string("normed_409_axes_0"), val = tensor([-1])]; - fp16 var_14348_to_fp16 = const()[name = string("op_14348_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_409_cast_fp16 = layer_norm(axes = normed_409_axes_0, epsilon = var_14348_to_fp16, x = input_341_cast_fp16)[name = string("normed_409_cast_fp16")]; - tensor normed_411_begin_0 = const()[name = string("normed_411_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_411_end_0 = const()[name = string("normed_411_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_411_end_mask_0 = const()[name = string("normed_411_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_411_cast_fp16 = slice_by_index(begin = normed_411_begin_0, end = normed_411_end_0, end_mask = normed_411_end_mask_0, x = normed_409_cast_fp16)[name = string("normed_411_cast_fp16")]; - tensor var_14367_to_fp16 = const()[name = string("op_14367_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376118208)))]; - tensor hidden_states_275_cast_fp16 = mul(x = normed_411_cast_fp16, y = var_14367_to_fp16)[name = string("hidden_states_275_cast_fp16")]; - tensor var_14378 = const()[name = string("op_14378"), val = tensor([0, 2, 1])]; - tensor var_14381_axes_0 = const()[name = string("op_14381_axes_0"), val = tensor([2])]; - tensor var_14379_cast_fp16 = transpose(perm = var_14378, x = hidden_states_275_cast_fp16)[name = string("transpose_80")]; - tensor var_14381_cast_fp16 = expand_dims(axes = var_14381_axes_0, x = var_14379_cast_fp16)[name = string("op_14381_cast_fp16")]; - string query_states_137_pad_type_0 = const()[name = string("query_states_137_pad_type_0"), val = string("valid")]; - tensor query_states_137_strides_0 = const()[name = string("query_states_137_strides_0"), val = tensor([1, 1])]; - tensor query_states_137_pad_0 = const()[name = string("query_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_137_dilations_0 = const()[name = string("query_states_137_dilations_0"), val = tensor([1, 1])]; - int32 query_states_137_groups_0 = const()[name = string("query_states_137_groups_0"), val = int32(1)]; - tensor query_states_137 = conv(dilations = query_states_137_dilations_0, groups = query_states_137_groups_0, pad = query_states_137_pad_0, pad_type = query_states_137_pad_type_0, strides = query_states_137_strides_0, weight = model_model_layers_17_self_attn_q_proj_weight_palettized, x = var_14381_cast_fp16)[name = string("query_states_137")]; - string key_states_171_pad_type_0 = const()[name = string("key_states_171_pad_type_0"), val = string("valid")]; - tensor key_states_171_strides_0 = const()[name = string("key_states_171_strides_0"), val = tensor([1, 1])]; - tensor key_states_171_pad_0 = const()[name = string("key_states_171_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_171_dilations_0 = const()[name = string("key_states_171_dilations_0"), val = tensor([1, 1])]; - int32 key_states_171_groups_0 = const()[name = string("key_states_171_groups_0"), val = int32(1)]; - tensor key_states_171 = conv(dilations = key_states_171_dilations_0, groups = key_states_171_groups_0, pad = key_states_171_pad_0, pad_type = key_states_171_pad_type_0, strides = key_states_171_strides_0, weight = model_model_layers_17_self_attn_k_proj_weight_palettized, x = var_14381_cast_fp16)[name = string("key_states_171")]; - string value_states_137_pad_type_0 = const()[name = string("value_states_137_pad_type_0"), val = string("valid")]; - tensor value_states_137_strides_0 = const()[name = string("value_states_137_strides_0"), val = tensor([1, 1])]; - tensor value_states_137_pad_0 = const()[name = string("value_states_137_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_137_dilations_0 = const()[name = string("value_states_137_dilations_0"), val = tensor([1, 1])]; - int32 value_states_137_groups_0 = const()[name = string("value_states_137_groups_0"), val = int32(1)]; - tensor value_states_137 = conv(dilations = value_states_137_dilations_0, groups = value_states_137_groups_0, pad = value_states_137_pad_0, pad_type = value_states_137_pad_type_0, strides = value_states_137_strides_0, weight = model_model_layers_17_self_attn_v_proj_weight_palettized, x = var_14381_cast_fp16)[name = string("value_states_137")]; - tensor var_14423 = const()[name = string("op_14423"), val = tensor([1, 4, 256, 64])]; - tensor var_14424 = reshape(shape = var_14423, x = query_states_137)[name = string("op_14424")]; - tensor var_14429 = const()[name = string("op_14429"), val = tensor([0, 1, 3, 2])]; - tensor var_14434 = const()[name = string("op_14434"), val = tensor([1, 1, 256, 64])]; - tensor var_14435 = reshape(shape = var_14434, x = key_states_171)[name = string("op_14435")]; - tensor var_14440 = const()[name = string("op_14440"), val = tensor([0, 1, 3, 2])]; - tensor var_14445 = const()[name = string("op_14445"), val = tensor([1, 1, 256, 64])]; - tensor var_14446 = reshape(shape = var_14445, x = value_states_137)[name = string("op_14446")]; - tensor var_14451 = const()[name = string("op_14451"), val = tensor([0, 1, 3, 2])]; - int32 var_14462 = const()[name = string("op_14462"), val = int32(-1)]; - fp16 const_722_promoted = const()[name = string("const_722_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_277 = transpose(perm = var_14429, x = var_14424)[name = string("transpose_79")]; - tensor var_14464 = mul(x = hidden_states_277, y = const_722_promoted)[name = string("op_14464")]; - bool input_345_interleave_0 = const()[name = string("input_345_interleave_0"), val = bool(false)]; - tensor input_345 = concat(axis = var_14462, interleave = input_345_interleave_0, values = (hidden_states_277, var_14464))[name = string("input_345")]; - tensor normed_413_axes_0 = const()[name = string("normed_413_axes_0"), val = tensor([-1])]; - fp16 var_14459_to_fp16 = const()[name = string("op_14459_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_413_cast_fp16 = layer_norm(axes = normed_413_axes_0, epsilon = var_14459_to_fp16, x = input_345)[name = string("normed_413_cast_fp16")]; - tensor normed_415_begin_0 = const()[name = string("normed_415_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_415_end_0 = const()[name = string("normed_415_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_415_end_mask_0 = const()[name = string("normed_415_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_415 = slice_by_index(begin = normed_415_begin_0, end = normed_415_end_0, end_mask = normed_415_end_mask_0, x = normed_413_cast_fp16)[name = string("normed_415")]; - tensor var_14478_to_fp16 = const()[name = string("op_14478_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376120576)))]; - tensor q_35_cast_fp16 = mul(x = normed_415, y = var_14478_to_fp16)[name = string("q_35_cast_fp16")]; - int32 var_14489 = const()[name = string("op_14489"), val = int32(-1)]; - fp16 const_726_promoted = const()[name = string("const_726_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_279 = transpose(perm = var_14440, x = var_14435)[name = string("transpose_78")]; - tensor var_14491 = mul(x = hidden_states_279, y = const_726_promoted)[name = string("op_14491")]; - bool input_347_interleave_0 = const()[name = string("input_347_interleave_0"), val = bool(false)]; - tensor input_347 = concat(axis = var_14489, interleave = input_347_interleave_0, values = (hidden_states_279, var_14491))[name = string("input_347")]; - tensor normed_417_axes_0 = const()[name = string("normed_417_axes_0"), val = tensor([-1])]; - fp16 var_14486_to_fp16 = const()[name = string("op_14486_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_417_cast_fp16 = layer_norm(axes = normed_417_axes_0, epsilon = var_14486_to_fp16, x = input_347)[name = string("normed_417_cast_fp16")]; - tensor normed_419_begin_0 = const()[name = string("normed_419_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_419_end_0 = const()[name = string("normed_419_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_419_end_mask_0 = const()[name = string("normed_419_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_419 = slice_by_index(begin = normed_419_begin_0, end = normed_419_end_0, end_mask = normed_419_end_mask_0, x = normed_417_cast_fp16)[name = string("normed_419")]; - tensor var_14505_to_fp16 = const()[name = string("op_14505_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376121152)))]; - tensor k_35_cast_fp16 = mul(x = normed_419, y = var_14505_to_fp16)[name = string("k_35_cast_fp16")]; - tensor var_14519_cast_fp16 = mul(x = q_35_cast_fp16, y = cos_35)[name = string("op_14519_cast_fp16")]; - tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_69_cast_fp16 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_35_cast_fp16)[name = string("x1_69_cast_fp16")]; - tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_69_cast_fp16 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_35_cast_fp16)[name = string("x2_69_cast_fp16")]; - fp16 const_732_promoted_to_fp16 = const()[name = string("const_732_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14540_cast_fp16 = mul(x = x2_69_cast_fp16, y = const_732_promoted_to_fp16)[name = string("op_14540_cast_fp16")]; - int32 var_14542 = const()[name = string("op_14542"), val = int32(-1)]; - bool var_14543_interleave_0 = const()[name = string("op_14543_interleave_0"), val = bool(false)]; - tensor var_14543_cast_fp16 = concat(axis = var_14542, interleave = var_14543_interleave_0, values = (var_14540_cast_fp16, x1_69_cast_fp16))[name = string("op_14543_cast_fp16")]; - tensor var_14544_cast_fp16 = mul(x = var_14543_cast_fp16, y = sin_35)[name = string("op_14544_cast_fp16")]; - tensor query_states_139_cast_fp16 = add(x = var_14519_cast_fp16, y = var_14544_cast_fp16)[name = string("query_states_139_cast_fp16")]; - tensor var_14547_cast_fp16 = mul(x = k_35_cast_fp16, y = cos_35)[name = string("op_14547_cast_fp16")]; - tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_71_cast_fp16 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_35_cast_fp16)[name = string("x1_71_cast_fp16")]; - tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_71_cast_fp16 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_35_cast_fp16)[name = string("x2_71_cast_fp16")]; - fp16 const_735_promoted_to_fp16 = const()[name = string("const_735_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14568_cast_fp16 = mul(x = x2_71_cast_fp16, y = const_735_promoted_to_fp16)[name = string("op_14568_cast_fp16")]; - int32 var_14570 = const()[name = string("op_14570"), val = int32(-1)]; - bool var_14571_interleave_0 = const()[name = string("op_14571_interleave_0"), val = bool(false)]; - tensor var_14571_cast_fp16 = concat(axis = var_14570, interleave = var_14571_interleave_0, values = (var_14568_cast_fp16, x1_71_cast_fp16))[name = string("op_14571_cast_fp16")]; - tensor var_14572_cast_fp16 = mul(x = var_14571_cast_fp16, y = sin_35)[name = string("op_14572_cast_fp16")]; - tensor key_states_173_cast_fp16 = add(x = var_14547_cast_fp16, y = var_14572_cast_fp16)[name = string("key_states_173_cast_fp16")]; - tensor expand_dims_174 = const()[name = string("expand_dims_174"), val = tensor([2])]; - tensor expand_dims_175 = const()[name = string("expand_dims_175"), val = tensor([0])]; - tensor expand_dims_177 = const()[name = string("expand_dims_177"), val = tensor([0])]; - tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([3])]; - int32 concat_248_axis_0 = const()[name = string("concat_248_axis_0"), val = int32(0)]; - bool concat_248_interleave_0 = const()[name = string("concat_248_interleave_0"), val = bool(false)]; - tensor concat_248 = concat(axis = concat_248_axis_0, interleave = concat_248_interleave_0, values = (expand_dims_174, expand_dims_175, current_pos, expand_dims_177))[name = string("concat_248")]; - tensor concat_249_values1_0 = const()[name = string("concat_249_values1_0"), val = tensor([0])]; - tensor concat_249_values3_0 = const()[name = string("concat_249_values3_0"), val = tensor([0])]; - int32 concat_249_axis_0 = const()[name = string("concat_249_axis_0"), val = int32(0)]; - bool concat_249_interleave_0 = const()[name = string("concat_249_interleave_0"), val = bool(false)]; - tensor concat_249 = concat(axis = concat_249_axis_0, interleave = concat_249_interleave_0, values = (expand_dims_178, concat_249_values1_0, end_pos_1, concat_249_values3_0))[name = string("concat_249")]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_248, begin_mask = model_model_kv_cache_global_internal_tensor_assign_5_begin_mask_0, end = concat_249, end_mask = model_model_kv_cache_global_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_5_stride_0, update = key_states_173_cast_fp16, x = coreml_update_state_75)[name = string("model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_190_write_state")]; - tensor coreml_update_state_86 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_190")]; - tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([6])]; - tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; - tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; - tensor expand_dims_184 = const()[name = string("expand_dims_184"), val = tensor([7])]; - int32 concat_252_axis_0 = const()[name = string("concat_252_axis_0"), val = int32(0)]; - bool concat_252_interleave_0 = const()[name = string("concat_252_interleave_0"), val = bool(false)]; - tensor concat_252 = concat(axis = concat_252_axis_0, interleave = concat_252_interleave_0, values = (expand_dims_180, expand_dims_181, current_pos, expand_dims_183))[name = string("concat_252")]; - tensor concat_253_values1_0 = const()[name = string("concat_253_values1_0"), val = tensor([0])]; - tensor concat_253_values3_0 = const()[name = string("concat_253_values3_0"), val = tensor([0])]; - int32 concat_253_axis_0 = const()[name = string("concat_253_axis_0"), val = int32(0)]; - bool concat_253_interleave_0 = const()[name = string("concat_253_interleave_0"), val = bool(false)]; - tensor concat_253 = concat(axis = concat_253_axis_0, interleave = concat_253_interleave_0, values = (expand_dims_184, concat_253_values1_0, end_pos_1, concat_253_values3_0))[name = string("concat_253")]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_139 = transpose(perm = var_14451, x = var_14446)[name = string("transpose_77")]; - tensor model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_252, begin_mask = model_model_kv_cache_global_internal_tensor_assign_6_begin_mask_0, end = concat_253, end_mask = model_model_kv_cache_global_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_6_stride_0, update = value_states_139, x = coreml_update_state_86)[name = string("model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_191_write_state")]; - tensor coreml_update_state_87 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_191")]; - tensor var_14671_begin_0 = const()[name = string("op_14671_begin_0"), val = tensor([2, 0, 0, 0])]; - tensor var_14671_end_0 = const()[name = string("op_14671_end_0"), val = tensor([3, 1, 4096, 256])]; - tensor var_14671_end_mask_0 = const()[name = string("op_14671_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_14671_cast_fp16 = slice_by_index(begin = var_14671_begin_0, end = var_14671_end_0, end_mask = var_14671_end_mask_0, x = coreml_update_state_87)[name = string("op_14671_cast_fp16")]; - tensor var_14678_begin_0 = const()[name = string("op_14678_begin_0"), val = tensor([6, 0, 0, 0])]; - tensor var_14678_end_0 = const()[name = string("op_14678_end_0"), val = tensor([7, 1, 4096, 256])]; - tensor var_14678_end_mask_0 = const()[name = string("op_14678_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_14678_cast_fp16 = slice_by_index(begin = var_14678_begin_0, end = var_14678_end_0, end_mask = var_14678_end_mask_0, x = coreml_update_state_87)[name = string("op_14678_cast_fp16")]; - tensor var_14717 = const()[name = string("op_14717"), val = tensor([1, 4, 1, 1])]; - tensor x_277_cast_fp16 = tile(reps = var_14717, x = var_14671_cast_fp16)[name = string("x_277_cast_fp16")]; - tensor var_14737 = const()[name = string("op_14737"), val = tensor([1, 4, 1, 1])]; - tensor x_283_cast_fp16 = tile(reps = var_14737, x = var_14678_cast_fp16)[name = string("x_283_cast_fp16")]; - bool var_14764_transpose_x_0 = const()[name = string("op_14764_transpose_x_0"), val = bool(false)]; - bool var_14764_transpose_y_0 = const()[name = string("op_14764_transpose_y_0"), val = bool(true)]; - tensor var_14764 = matmul(transpose_x = var_14764_transpose_x_0, transpose_y = var_14764_transpose_y_0, x = query_states_139_cast_fp16, y = x_277_cast_fp16)[name = string("op_14764")]; - fp16 var_14765_to_fp16 = const()[name = string("op_14765_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_69_cast_fp16 = mul(x = var_14764, y = var_14765_to_fp16)[name = string("attn_weights_69_cast_fp16")]; - tensor attn_weights_71_cast_fp16 = add(x = attn_weights_69_cast_fp16, y = causal_mask)[name = string("attn_weights_71_cast_fp16")]; - int32 var_14800 = const()[name = string("op_14800"), val = int32(-1)]; - tensor var_14802_cast_fp16 = softmax(axis = var_14800, x = attn_weights_71_cast_fp16)[name = string("op_14802_cast_fp16")]; - tensor concat_258 = const()[name = string("concat_258"), val = tensor([4, 64, 4096])]; - tensor reshape_51_cast_fp16 = reshape(shape = concat_258, x = var_14802_cast_fp16)[name = string("reshape_51_cast_fp16")]; - tensor concat_259 = const()[name = string("concat_259"), val = tensor([4, 4096, 256])]; - tensor reshape_52_cast_fp16 = reshape(shape = concat_259, x = x_283_cast_fp16)[name = string("reshape_52_cast_fp16")]; - bool matmul_17_transpose_x_0 = const()[name = string("matmul_17_transpose_x_0"), val = bool(false)]; - bool matmul_17_transpose_y_0 = const()[name = string("matmul_17_transpose_y_0"), val = bool(false)]; - tensor matmul_17_cast_fp16 = matmul(transpose_x = matmul_17_transpose_x_0, transpose_y = matmul_17_transpose_y_0, x = reshape_51_cast_fp16, y = reshape_52_cast_fp16)[name = string("matmul_17_cast_fp16")]; - tensor concat_263 = const()[name = string("concat_263"), val = tensor([1, 4, 64, 256])]; - tensor reshape_53_cast_fp16 = reshape(shape = concat_263, x = matmul_17_cast_fp16)[name = string("reshape_53_cast_fp16")]; - tensor var_14814_perm_0 = const()[name = string("op_14814_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_14833 = const()[name = string("op_14833"), val = tensor([1, 64, 1024])]; - tensor var_14814_cast_fp16 = transpose(perm = var_14814_perm_0, x = reshape_53_cast_fp16)[name = string("transpose_76")]; - tensor attn_output_175_cast_fp16 = reshape(shape = var_14833, x = var_14814_cast_fp16)[name = string("attn_output_175_cast_fp16")]; - tensor var_14838 = const()[name = string("op_14838"), val = tensor([0, 2, 1])]; - string var_14854_pad_type_0 = const()[name = string("op_14854_pad_type_0"), val = string("valid")]; - int32 var_14854_groups_0 = const()[name = string("op_14854_groups_0"), val = int32(1)]; - tensor var_14854_strides_0 = const()[name = string("op_14854_strides_0"), val = tensor([1])]; - tensor var_14854_pad_0 = const()[name = string("op_14854_pad_0"), val = tensor([0, 0])]; - tensor var_14854_dilations_0 = const()[name = string("op_14854_dilations_0"), val = tensor([1])]; - tensor squeeze_17_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376121728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377006528))))[name = string("squeeze_17_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_14839_cast_fp16 = transpose(perm = var_14838, x = attn_output_175_cast_fp16)[name = string("transpose_75")]; - tensor var_14854_cast_fp16 = conv(dilations = var_14854_dilations_0, groups = var_14854_groups_0, pad = var_14854_pad_0, pad_type = var_14854_pad_type_0, strides = var_14854_strides_0, weight = squeeze_17_cast_fp16_to_fp32_to_fp16_palettized, x = var_14839_cast_fp16)[name = string("op_14854_cast_fp16")]; - tensor var_14858 = const()[name = string("op_14858"), val = tensor([0, 2, 1])]; - int32 var_14869 = const()[name = string("op_14869"), val = int32(-1)]; - fp16 const_747_promoted_to_fp16 = const()[name = string("const_747_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_281_cast_fp16 = transpose(perm = var_14858, x = var_14854_cast_fp16)[name = string("transpose_74")]; - tensor var_14871_cast_fp16 = mul(x = hidden_states_281_cast_fp16, y = const_747_promoted_to_fp16)[name = string("op_14871_cast_fp16")]; - bool input_351_interleave_0 = const()[name = string("input_351_interleave_0"), val = bool(false)]; - tensor input_351_cast_fp16 = concat(axis = var_14869, interleave = input_351_interleave_0, values = (hidden_states_281_cast_fp16, var_14871_cast_fp16))[name = string("input_351_cast_fp16")]; - tensor normed_421_axes_0 = const()[name = string("normed_421_axes_0"), val = tensor([-1])]; - fp16 var_14866_to_fp16 = const()[name = string("op_14866_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_421_cast_fp16 = layer_norm(axes = normed_421_axes_0, epsilon = var_14866_to_fp16, x = input_351_cast_fp16)[name = string("normed_421_cast_fp16")]; - tensor normed_423_begin_0 = const()[name = string("normed_423_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_423_end_0 = const()[name = string("normed_423_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_423_end_mask_0 = const()[name = string("normed_423_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_423_cast_fp16 = slice_by_index(begin = normed_423_begin_0, end = normed_423_end_0, end_mask = normed_423_end_mask_0, x = normed_421_cast_fp16)[name = string("normed_423_cast_fp16")]; - tensor var_14885_to_fp16 = const()[name = string("op_14885_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377025024)))]; - tensor attn_output_179_cast_fp16 = mul(x = normed_423_cast_fp16, y = var_14885_to_fp16)[name = string("attn_output_179_cast_fp16")]; - tensor hidden_states_283_cast_fp16 = add(x = hidden_states_273_cast_fp16, y = attn_output_179_cast_fp16)[name = string("hidden_states_283_cast_fp16")]; - int32 var_14898 = const()[name = string("op_14898"), val = int32(-1)]; - fp16 const_751_promoted_to_fp16 = const()[name = string("const_751_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_14900_cast_fp16 = mul(x = hidden_states_283_cast_fp16, y = const_751_promoted_to_fp16)[name = string("op_14900_cast_fp16")]; - bool input_353_interleave_0 = const()[name = string("input_353_interleave_0"), val = bool(false)]; - tensor input_353_cast_fp16 = concat(axis = var_14898, interleave = input_353_interleave_0, values = (hidden_states_283_cast_fp16, var_14900_cast_fp16))[name = string("input_353_cast_fp16")]; - tensor normed_425_axes_0 = const()[name = string("normed_425_axes_0"), val = tensor([-1])]; - fp16 var_14895_to_fp16 = const()[name = string("op_14895_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_425_cast_fp16 = layer_norm(axes = normed_425_axes_0, epsilon = var_14895_to_fp16, x = input_353_cast_fp16)[name = string("normed_425_cast_fp16")]; - tensor normed_427_begin_0 = const()[name = string("normed_427_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_427_end_0 = const()[name = string("normed_427_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_427_end_mask_0 = const()[name = string("normed_427_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_427_cast_fp16 = slice_by_index(begin = normed_427_begin_0, end = normed_427_end_0, end_mask = normed_427_end_mask_0, x = normed_425_cast_fp16)[name = string("normed_427_cast_fp16")]; - tensor var_14914_to_fp16 = const()[name = string("op_14914_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377027392)))]; - tensor x_285_cast_fp16 = mul(x = normed_427_cast_fp16, y = var_14914_to_fp16)[name = string("x_285_cast_fp16")]; - tensor var_14926 = const()[name = string("op_14926"), val = tensor([0, 2, 1])]; - tensor input_355_axes_0 = const()[name = string("input_355_axes_0"), val = tensor([2])]; - tensor var_14927_cast_fp16 = transpose(perm = var_14926, x = x_285_cast_fp16)[name = string("transpose_73")]; - tensor input_355_cast_fp16 = expand_dims(axes = input_355_axes_0, x = var_14927_cast_fp16)[name = string("input_355_cast_fp16")]; - string x_287_pad_type_0 = const()[name = string("x_287_pad_type_0"), val = string("valid")]; - tensor x_287_strides_0 = const()[name = string("x_287_strides_0"), val = tensor([1, 1])]; - tensor x_287_pad_0 = const()[name = string("x_287_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_287_dilations_0 = const()[name = string("x_287_dilations_0"), val = tensor([1, 1])]; - int32 x_287_groups_0 = const()[name = string("x_287_groups_0"), val = int32(1)]; - tensor model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1398246592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1404218624))))[name = string("model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_287_cast_fp16 = conv(dilations = x_287_dilations_0, groups = x_287_groups_0, pad = x_287_pad_0, pad_type = x_287_pad_type_0, strides = x_287_strides_0, weight = model_model_layers_17_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_355_cast_fp16)[name = string("x_287_cast_fp16")]; - string b_35_pad_type_0 = const()[name = string("b_35_pad_type_0"), val = string("valid")]; - tensor b_35_strides_0 = const()[name = string("b_35_strides_0"), val = tensor([1, 1])]; - tensor b_35_pad_0 = const()[name = string("b_35_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_35_dilations_0 = const()[name = string("b_35_dilations_0"), val = tensor([1, 1])]; - int32 b_35_groups_0 = const()[name = string("b_35_groups_0"), val = int32(1)]; - tensor model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1404329280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1410301312))))[name = string("model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_35_cast_fp16 = conv(dilations = b_35_dilations_0, groups = b_35_groups_0, pad = b_35_pad_0, pad_type = b_35_pad_type_0, strides = b_35_strides_0, weight = model_model_layers_17_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_355_cast_fp16)[name = string("b_35_cast_fp16")]; - string var_14952_mode_0 = const()[name = string("op_14952_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_14952_cast_fp16 = gelu(mode = var_14952_mode_0, x = x_287_cast_fp16)[name = string("op_14952_cast_fp16")]; - tensor input_357_cast_fp16 = mul(x = var_14952_cast_fp16, y = b_35_cast_fp16)[name = string("input_357_cast_fp16")]; - string e_35_pad_type_0 = const()[name = string("e_35_pad_type_0"), val = string("valid")]; - tensor e_35_strides_0 = const()[name = string("e_35_strides_0"), val = tensor([1, 1])]; - tensor e_35_pad_0 = const()[name = string("e_35_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_35_dilations_0 = const()[name = string("e_35_dilations_0"), val = tensor([1, 1])]; - int32 e_35_groups_0 = const()[name = string("e_35_groups_0"), val = int32(1)]; - tensor model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389195136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395167168))))[name = string("model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_35_cast_fp16 = conv(dilations = e_35_dilations_0, groups = e_35_groups_0, pad = e_35_pad_0, pad_type = e_35_pad_type_0, strides = e_35_strides_0, weight = model_model_layers_17_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_357_cast_fp16)[name = string("e_35_cast_fp16")]; - tensor var_14960_axes_0 = const()[name = string("op_14960_axes_0"), val = tensor([2])]; - tensor var_14960_cast_fp16 = squeeze(axes = var_14960_axes_0, x = e_35_cast_fp16)[name = string("op_14960_cast_fp16")]; - tensor var_14961 = const()[name = string("op_14961"), val = tensor([0, 2, 1])]; - int32 var_14972 = const()[name = string("op_14972"), val = int32(-1)]; - fp16 const_755_promoted_to_fp16 = const()[name = string("const_755_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_285_cast_fp16 = transpose(perm = var_14961, x = var_14960_cast_fp16)[name = string("transpose_72")]; - tensor var_14974_cast_fp16 = mul(x = hidden_states_285_cast_fp16, y = const_755_promoted_to_fp16)[name = string("op_14974_cast_fp16")]; - bool input_359_interleave_0 = const()[name = string("input_359_interleave_0"), val = bool(false)]; - tensor input_359_cast_fp16 = concat(axis = var_14972, interleave = input_359_interleave_0, values = (hidden_states_285_cast_fp16, var_14974_cast_fp16))[name = string("input_359_cast_fp16")]; - tensor normed_429_axes_0 = const()[name = string("normed_429_axes_0"), val = tensor([-1])]; - fp16 var_14969_to_fp16 = const()[name = string("op_14969_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_429_cast_fp16 = layer_norm(axes = normed_429_axes_0, epsilon = var_14969_to_fp16, x = input_359_cast_fp16)[name = string("normed_429_cast_fp16")]; - tensor normed_431_begin_0 = const()[name = string("normed_431_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_431_end_0 = const()[name = string("normed_431_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_431_end_mask_0 = const()[name = string("normed_431_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_431_cast_fp16 = slice_by_index(begin = normed_431_begin_0, end = normed_431_end_0, end_mask = normed_431_end_mask_0, x = normed_429_cast_fp16)[name = string("normed_431_cast_fp16")]; - tensor var_14988_to_fp16 = const()[name = string("op_14988_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395185664)))]; - tensor hidden_states_287_cast_fp16 = mul(x = normed_431_cast_fp16, y = var_14988_to_fp16)[name = string("hidden_states_287_cast_fp16")]; - tensor hidden_states_289_cast_fp16 = add(x = hidden_states_283_cast_fp16, y = hidden_states_287_cast_fp16)[name = string("hidden_states_289_cast_fp16")]; - int32 var_15042 = const()[name = string("op_15042"), val = int32(-1)]; - fp16 const_760_promoted_to_fp16 = const()[name = string("const_760_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15044_cast_fp16 = mul(x = hidden_states_289_cast_fp16, y = const_760_promoted_to_fp16)[name = string("op_15044_cast_fp16")]; - bool input_361_interleave_0 = const()[name = string("input_361_interleave_0"), val = bool(false)]; - tensor input_361_cast_fp16 = concat(axis = var_15042, interleave = input_361_interleave_0, values = (hidden_states_289_cast_fp16, var_15044_cast_fp16))[name = string("input_361_cast_fp16")]; - tensor normed_433_axes_0 = const()[name = string("normed_433_axes_0"), val = tensor([-1])]; - fp16 var_15039_to_fp16 = const()[name = string("op_15039_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_433_cast_fp16 = layer_norm(axes = normed_433_axes_0, epsilon = var_15039_to_fp16, x = input_361_cast_fp16)[name = string("normed_433_cast_fp16")]; - tensor normed_435_begin_0 = const()[name = string("normed_435_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_435_end_0 = const()[name = string("normed_435_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_435_end_mask_0 = const()[name = string("normed_435_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_435_cast_fp16 = slice_by_index(begin = normed_435_begin_0, end = normed_435_end_0, end_mask = normed_435_end_mask_0, x = normed_433_cast_fp16)[name = string("normed_435_cast_fp16")]; - tensor var_15058_to_fp16 = const()[name = string("op_15058_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395188032)))]; - tensor hidden_states_291_cast_fp16 = mul(x = normed_435_cast_fp16, y = var_15058_to_fp16)[name = string("hidden_states_291_cast_fp16")]; - tensor var_15069 = const()[name = string("op_15069"), val = tensor([0, 2, 1])]; - tensor var_15072_axes_0 = const()[name = string("op_15072_axes_0"), val = tensor([2])]; - tensor var_15070_cast_fp16 = transpose(perm = var_15069, x = hidden_states_291_cast_fp16)[name = string("transpose_71")]; - tensor var_15072_cast_fp16 = expand_dims(axes = var_15072_axes_0, x = var_15070_cast_fp16)[name = string("op_15072_cast_fp16")]; - string query_states_145_pad_type_0 = const()[name = string("query_states_145_pad_type_0"), val = string("valid")]; - tensor query_states_145_strides_0 = const()[name = string("query_states_145_strides_0"), val = tensor([1, 1])]; - tensor query_states_145_pad_0 = const()[name = string("query_states_145_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_145_dilations_0 = const()[name = string("query_states_145_dilations_0"), val = tensor([1, 1])]; - int32 query_states_145_groups_0 = const()[name = string("query_states_145_groups_0"), val = int32(1)]; - tensor query_states_145 = conv(dilations = query_states_145_dilations_0, groups = query_states_145_groups_0, pad = query_states_145_pad_0, pad_type = query_states_145_pad_type_0, strides = query_states_145_strides_0, weight = model_model_layers_18_self_attn_q_proj_weight_palettized, x = var_15072_cast_fp16)[name = string("query_states_145")]; - string key_states_181_pad_type_0 = const()[name = string("key_states_181_pad_type_0"), val = string("valid")]; - tensor key_states_181_strides_0 = const()[name = string("key_states_181_strides_0"), val = tensor([1, 1])]; - tensor key_states_181_pad_0 = const()[name = string("key_states_181_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_181_dilations_0 = const()[name = string("key_states_181_dilations_0"), val = tensor([1, 1])]; - int32 key_states_181_groups_0 = const()[name = string("key_states_181_groups_0"), val = int32(1)]; - tensor key_states_181 = conv(dilations = key_states_181_dilations_0, groups = key_states_181_groups_0, pad = key_states_181_pad_0, pad_type = key_states_181_pad_type_0, strides = key_states_181_strides_0, weight = model_model_layers_18_self_attn_k_proj_weight_palettized, x = var_15072_cast_fp16)[name = string("key_states_181")]; - string value_states_145_pad_type_0 = const()[name = string("value_states_145_pad_type_0"), val = string("valid")]; - tensor value_states_145_strides_0 = const()[name = string("value_states_145_strides_0"), val = tensor([1, 1])]; - tensor value_states_145_pad_0 = const()[name = string("value_states_145_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_145_dilations_0 = const()[name = string("value_states_145_dilations_0"), val = tensor([1, 1])]; - int32 value_states_145_groups_0 = const()[name = string("value_states_145_groups_0"), val = int32(1)]; - tensor value_states_145 = conv(dilations = value_states_145_dilations_0, groups = value_states_145_groups_0, pad = value_states_145_pad_0, pad_type = value_states_145_pad_type_0, strides = value_states_145_strides_0, weight = model_model_layers_18_self_attn_v_proj_weight_palettized, x = var_15072_cast_fp16)[name = string("value_states_145")]; - tensor var_15114 = const()[name = string("op_15114"), val = tensor([1, 4, 256, 64])]; - tensor var_15115 = reshape(shape = var_15114, x = query_states_145)[name = string("op_15115")]; - tensor var_15120 = const()[name = string("op_15120"), val = tensor([0, 1, 3, 2])]; - tensor var_15125 = const()[name = string("op_15125"), val = tensor([1, 1, 256, 64])]; - tensor var_15126 = reshape(shape = var_15125, x = key_states_181)[name = string("op_15126")]; - tensor var_15131 = const()[name = string("op_15131"), val = tensor([0, 1, 3, 2])]; - tensor var_15136 = const()[name = string("op_15136"), val = tensor([1, 1, 256, 64])]; - tensor var_15137 = reshape(shape = var_15136, x = value_states_145)[name = string("op_15137")]; - tensor var_15142 = const()[name = string("op_15142"), val = tensor([0, 1, 3, 2])]; - int32 var_15153 = const()[name = string("op_15153"), val = int32(-1)]; - fp16 const_765_promoted = const()[name = string("const_765_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_293 = transpose(perm = var_15120, x = var_15115)[name = string("transpose_70")]; - tensor var_15155 = mul(x = hidden_states_293, y = const_765_promoted)[name = string("op_15155")]; - bool input_365_interleave_0 = const()[name = string("input_365_interleave_0"), val = bool(false)]; - tensor input_365 = concat(axis = var_15153, interleave = input_365_interleave_0, values = (hidden_states_293, var_15155))[name = string("input_365")]; - tensor normed_437_axes_0 = const()[name = string("normed_437_axes_0"), val = tensor([-1])]; - fp16 var_15150_to_fp16 = const()[name = string("op_15150_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_437_cast_fp16 = layer_norm(axes = normed_437_axes_0, epsilon = var_15150_to_fp16, x = input_365)[name = string("normed_437_cast_fp16")]; - tensor normed_439_begin_0 = const()[name = string("normed_439_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_439_end_0 = const()[name = string("normed_439_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_439_end_mask_0 = const()[name = string("normed_439_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_439 = slice_by_index(begin = normed_439_begin_0, end = normed_439_end_0, end_mask = normed_439_end_mask_0, x = normed_437_cast_fp16)[name = string("normed_439")]; - tensor var_15169_to_fp16 = const()[name = string("op_15169_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395190400)))]; - tensor q_37_cast_fp16 = mul(x = normed_439, y = var_15169_to_fp16)[name = string("q_37_cast_fp16")]; - int32 var_15180 = const()[name = string("op_15180"), val = int32(-1)]; - fp16 const_769_promoted = const()[name = string("const_769_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_295 = transpose(perm = var_15131, x = var_15126)[name = string("transpose_69")]; - tensor var_15182 = mul(x = hidden_states_295, y = const_769_promoted)[name = string("op_15182")]; - bool input_367_interleave_0 = const()[name = string("input_367_interleave_0"), val = bool(false)]; - tensor input_367 = concat(axis = var_15180, interleave = input_367_interleave_0, values = (hidden_states_295, var_15182))[name = string("input_367")]; - tensor normed_441_axes_0 = const()[name = string("normed_441_axes_0"), val = tensor([-1])]; - fp16 var_15177_to_fp16 = const()[name = string("op_15177_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_441_cast_fp16 = layer_norm(axes = normed_441_axes_0, epsilon = var_15177_to_fp16, x = input_367)[name = string("normed_441_cast_fp16")]; - tensor normed_443_begin_0 = const()[name = string("normed_443_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_443_end_0 = const()[name = string("normed_443_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_443_end_mask_0 = const()[name = string("normed_443_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_443 = slice_by_index(begin = normed_443_begin_0, end = normed_443_end_0, end_mask = normed_443_end_mask_0, x = normed_441_cast_fp16)[name = string("normed_443")]; - tensor var_15196_to_fp16 = const()[name = string("op_15196_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395190976)))]; - tensor k_37_cast_fp16 = mul(x = normed_443, y = var_15196_to_fp16)[name = string("k_37_cast_fp16")]; - tensor var_15210_cast_fp16 = mul(x = q_37_cast_fp16, y = cos_5)[name = string("op_15210_cast_fp16")]; - tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_73_cast_fp16 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_37_cast_fp16)[name = string("x1_73_cast_fp16")]; - tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_73_cast_fp16 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_37_cast_fp16)[name = string("x2_73_cast_fp16")]; - fp16 const_775_promoted_to_fp16 = const()[name = string("const_775_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15231_cast_fp16 = mul(x = x2_73_cast_fp16, y = const_775_promoted_to_fp16)[name = string("op_15231_cast_fp16")]; - int32 var_15233 = const()[name = string("op_15233"), val = int32(-1)]; - bool var_15234_interleave_0 = const()[name = string("op_15234_interleave_0"), val = bool(false)]; - tensor var_15234_cast_fp16 = concat(axis = var_15233, interleave = var_15234_interleave_0, values = (var_15231_cast_fp16, x1_73_cast_fp16))[name = string("op_15234_cast_fp16")]; - tensor var_15235_cast_fp16 = mul(x = var_15234_cast_fp16, y = sin_5)[name = string("op_15235_cast_fp16")]; - tensor query_states_147_cast_fp16 = add(x = var_15210_cast_fp16, y = var_15235_cast_fp16)[name = string("query_states_147_cast_fp16")]; - tensor var_15238_cast_fp16 = mul(x = k_37_cast_fp16, y = cos_5)[name = string("op_15238_cast_fp16")]; - tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_75_cast_fp16 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_37_cast_fp16)[name = string("x1_75_cast_fp16")]; - tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_75_cast_fp16 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_37_cast_fp16)[name = string("x2_75_cast_fp16")]; - fp16 const_778_promoted_to_fp16 = const()[name = string("const_778_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15259_cast_fp16 = mul(x = x2_75_cast_fp16, y = const_778_promoted_to_fp16)[name = string("op_15259_cast_fp16")]; - int32 var_15261 = const()[name = string("op_15261"), val = int32(-1)]; - bool var_15262_interleave_0 = const()[name = string("op_15262_interleave_0"), val = bool(false)]; - tensor var_15262_cast_fp16 = concat(axis = var_15261, interleave = var_15262_interleave_0, values = (var_15259_cast_fp16, x1_75_cast_fp16))[name = string("op_15262_cast_fp16")]; - tensor var_15263_cast_fp16 = mul(x = var_15262_cast_fp16, y = sin_5)[name = string("op_15263_cast_fp16")]; - tensor key_states_183_cast_fp16 = add(x = var_15238_cast_fp16, y = var_15263_cast_fp16)[name = string("key_states_183_cast_fp16")]; - tensor key_slice_31_begin_0 = const()[name = string("key_slice_31_begin_0"), val = tensor([15, 0, 0, 0])]; - tensor key_slice_31_end_0 = const()[name = string("key_slice_31_end_0"), val = tensor([16, 1, 512, 256])]; - tensor key_slice_31_end_mask_0 = const()[name = string("key_slice_31_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_31_cast_fp16 = slice_by_index(begin = key_slice_31_begin_0, end = key_slice_31_end_0, end_mask = key_slice_31_end_mask_0, x = coreml_update_state_85)[name = string("key_slice_31_cast_fp16")]; - tensor var_15300_begin_0 = const()[name = string("op_15300_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_15300_end_0 = const()[name = string("op_15300_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_15300_end_mask_0 = const()[name = string("op_15300_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_15300_cast_fp16 = slice_by_index(begin = var_15300_begin_0, end = var_15300_end_0, end_mask = var_15300_end_mask_0, x = key_slice_31_cast_fp16)[name = string("op_15300_cast_fp16")]; - int32 var_15327 = const()[name = string("op_15327"), val = int32(2)]; - bool shifted_key_31_interleave_0 = const()[name = string("shifted_key_31_interleave_0"), val = bool(false)]; - tensor shifted_key_31_cast_fp16 = concat(axis = var_15327, interleave = shifted_key_31_interleave_0, values = (var_15300_cast_fp16, key_states_183_cast_fp16))[name = string("shifted_key_31_cast_fp16")]; - tensor concat_264 = const()[name = string("concat_264"), val = tensor([15, 0, 0, 0])]; - tensor concat_265 = const()[name = string("concat_265"), val = tensor([16, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_264, begin_mask = model_model_kv_cache_local_internal_tensor_assign_31_begin_mask_0, end = concat_265, end_mask = model_model_kv_cache_local_internal_tensor_assign_31_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_31_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_31_stride_0, update = shifted_key_31_cast_fp16, x = coreml_update_state_85)[name = string("model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_31_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_192_write_state")]; - tensor coreml_update_state_88 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_192")]; - tensor value_slice_31_begin_0 = const()[name = string("value_slice_31_begin_0"), val = tensor([37, 0, 0, 0])]; - tensor value_slice_31_end_0 = const()[name = string("value_slice_31_end_0"), val = tensor([38, 1, 512, 256])]; - tensor value_slice_31_end_mask_0 = const()[name = string("value_slice_31_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_31_cast_fp16 = slice_by_index(begin = value_slice_31_begin_0, end = value_slice_31_end_0, end_mask = value_slice_31_end_mask_0, x = coreml_update_state_88)[name = string("value_slice_31_cast_fp16")]; - tensor var_15370_begin_0 = const()[name = string("op_15370_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_15370_end_0 = const()[name = string("op_15370_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_15370_end_mask_0 = const()[name = string("op_15370_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_15370_cast_fp16 = slice_by_index(begin = var_15370_begin_0, end = var_15370_end_0, end_mask = var_15370_end_mask_0, x = value_slice_31_cast_fp16)[name = string("op_15370_cast_fp16")]; - int32 var_15397 = const()[name = string("op_15397"), val = int32(2)]; - bool shifted_value_31_interleave_0 = const()[name = string("shifted_value_31_interleave_0"), val = bool(false)]; - tensor value_states_147 = transpose(perm = var_15142, x = var_15137)[name = string("transpose_68")]; - tensor shifted_value_31_cast_fp16 = concat(axis = var_15397, interleave = shifted_value_31_interleave_0, values = (var_15370_cast_fp16, value_states_147))[name = string("shifted_value_31_cast_fp16")]; - tensor concat_266 = const()[name = string("concat_266"), val = tensor([37, 0, 0, 0])]; - tensor concat_267 = const()[name = string("concat_267"), val = tensor([38, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_266, begin_mask = model_model_kv_cache_local_internal_tensor_assign_32_begin_mask_0, end = concat_267, end_mask = model_model_kv_cache_local_internal_tensor_assign_32_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_32_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_32_stride_0, update = shifted_value_31_cast_fp16, x = coreml_update_state_88)[name = string("model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_32_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_193_write_state")]; - tensor coreml_update_state_89 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_193")]; - tensor var_15425_begin_0 = const()[name = string("op_15425_begin_0"), val = tensor([15, 0, 0, 0])]; - tensor var_15425_end_0 = const()[name = string("op_15425_end_0"), val = tensor([16, 1, 512, 256])]; - tensor var_15425_end_mask_0 = const()[name = string("op_15425_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_15425_cast_fp16 = slice_by_index(begin = var_15425_begin_0, end = var_15425_end_0, end_mask = var_15425_end_mask_0, x = coreml_update_state_89)[name = string("op_15425_cast_fp16")]; - tensor var_15432_begin_0 = const()[name = string("op_15432_begin_0"), val = tensor([37, 0, 0, 0])]; - tensor var_15432_end_0 = const()[name = string("op_15432_end_0"), val = tensor([38, 1, 512, 256])]; - tensor var_15432_end_mask_0 = const()[name = string("op_15432_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_15432_cast_fp16 = slice_by_index(begin = var_15432_begin_0, end = var_15432_end_0, end_mask = var_15432_end_mask_0, x = coreml_update_state_89)[name = string("op_15432_cast_fp16")]; - tensor var_15471 = const()[name = string("op_15471"), val = tensor([1, 4, 1, 1])]; - tensor x_293_cast_fp16 = tile(reps = var_15471, x = var_15425_cast_fp16)[name = string("x_293_cast_fp16")]; - tensor var_15491 = const()[name = string("op_15491"), val = tensor([1, 4, 1, 1])]; - tensor x_299_cast_fp16 = tile(reps = var_15491, x = var_15432_cast_fp16)[name = string("x_299_cast_fp16")]; - bool var_15518_transpose_x_0 = const()[name = string("op_15518_transpose_x_0"), val = bool(false)]; - bool var_15518_transpose_y_0 = const()[name = string("op_15518_transpose_y_0"), val = bool(true)]; - tensor var_15518 = matmul(transpose_x = var_15518_transpose_x_0, transpose_y = var_15518_transpose_y_0, x = query_states_147_cast_fp16, y = x_293_cast_fp16)[name = string("op_15518")]; - fp16 var_15519_to_fp16 = const()[name = string("op_15519_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_73_cast_fp16 = mul(x = var_15518, y = var_15519_to_fp16)[name = string("attn_weights_73_cast_fp16")]; - tensor attn_weights_75_cast_fp16 = add(x = attn_weights_73_cast_fp16, y = mask_slice_1)[name = string("attn_weights_75_cast_fp16")]; - int32 var_15554 = const()[name = string("op_15554"), val = int32(-1)]; - tensor var_15556_cast_fp16 = softmax(axis = var_15554, x = attn_weights_75_cast_fp16)[name = string("op_15556_cast_fp16")]; - tensor concat_272 = const()[name = string("concat_272"), val = tensor([4, 64, 512])]; - tensor reshape_54_cast_fp16 = reshape(shape = concat_272, x = var_15556_cast_fp16)[name = string("reshape_54_cast_fp16")]; - tensor concat_273 = const()[name = string("concat_273"), val = tensor([4, 512, 256])]; - tensor reshape_55_cast_fp16 = reshape(shape = concat_273, x = x_299_cast_fp16)[name = string("reshape_55_cast_fp16")]; - bool matmul_18_transpose_x_0 = const()[name = string("matmul_18_transpose_x_0"), val = bool(false)]; - bool matmul_18_transpose_y_0 = const()[name = string("matmul_18_transpose_y_0"), val = bool(false)]; - tensor matmul_18_cast_fp16 = matmul(transpose_x = matmul_18_transpose_x_0, transpose_y = matmul_18_transpose_y_0, x = reshape_54_cast_fp16, y = reshape_55_cast_fp16)[name = string("matmul_18_cast_fp16")]; - tensor concat_277 = const()[name = string("concat_277"), val = tensor([1, 4, 64, 256])]; - tensor reshape_56_cast_fp16 = reshape(shape = concat_277, x = matmul_18_cast_fp16)[name = string("reshape_56_cast_fp16")]; - tensor var_15568_perm_0 = const()[name = string("op_15568_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_15587 = const()[name = string("op_15587"), val = tensor([1, 64, 1024])]; - tensor var_15568_cast_fp16 = transpose(perm = var_15568_perm_0, x = reshape_56_cast_fp16)[name = string("transpose_67")]; - tensor attn_output_185_cast_fp16 = reshape(shape = var_15587, x = var_15568_cast_fp16)[name = string("attn_output_185_cast_fp16")]; - tensor var_15592 = const()[name = string("op_15592"), val = tensor([0, 2, 1])]; - string var_15608_pad_type_0 = const()[name = string("op_15608_pad_type_0"), val = string("valid")]; - int32 var_15608_groups_0 = const()[name = string("op_15608_groups_0"), val = int32(1)]; - tensor var_15608_strides_0 = const()[name = string("op_15608_strides_0"), val = tensor([1])]; - tensor var_15608_pad_0 = const()[name = string("op_15608_pad_0"), val = tensor([0, 0])]; - tensor var_15608_dilations_0 = const()[name = string("op_15608_dilations_0"), val = tensor([1])]; - tensor squeeze_18_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395191552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396076352))))[name = string("squeeze_18_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_15593_cast_fp16 = transpose(perm = var_15592, x = attn_output_185_cast_fp16)[name = string("transpose_66")]; - tensor var_15608_cast_fp16 = conv(dilations = var_15608_dilations_0, groups = var_15608_groups_0, pad = var_15608_pad_0, pad_type = var_15608_pad_type_0, strides = var_15608_strides_0, weight = squeeze_18_cast_fp16_to_fp32_to_fp16_palettized, x = var_15593_cast_fp16)[name = string("op_15608_cast_fp16")]; - tensor var_15612 = const()[name = string("op_15612"), val = tensor([0, 2, 1])]; - int32 var_15623 = const()[name = string("op_15623"), val = int32(-1)]; - fp16 const_789_promoted_to_fp16 = const()[name = string("const_789_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_297_cast_fp16 = transpose(perm = var_15612, x = var_15608_cast_fp16)[name = string("transpose_65")]; - tensor var_15625_cast_fp16 = mul(x = hidden_states_297_cast_fp16, y = const_789_promoted_to_fp16)[name = string("op_15625_cast_fp16")]; - bool input_371_interleave_0 = const()[name = string("input_371_interleave_0"), val = bool(false)]; - tensor input_371_cast_fp16 = concat(axis = var_15623, interleave = input_371_interleave_0, values = (hidden_states_297_cast_fp16, var_15625_cast_fp16))[name = string("input_371_cast_fp16")]; - tensor normed_445_axes_0 = const()[name = string("normed_445_axes_0"), val = tensor([-1])]; - fp16 var_15620_to_fp16 = const()[name = string("op_15620_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_445_cast_fp16 = layer_norm(axes = normed_445_axes_0, epsilon = var_15620_to_fp16, x = input_371_cast_fp16)[name = string("normed_445_cast_fp16")]; - tensor normed_447_begin_0 = const()[name = string("normed_447_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_447_end_0 = const()[name = string("normed_447_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_447_end_mask_0 = const()[name = string("normed_447_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_447_cast_fp16 = slice_by_index(begin = normed_447_begin_0, end = normed_447_end_0, end_mask = normed_447_end_mask_0, x = normed_445_cast_fp16)[name = string("normed_447_cast_fp16")]; - tensor var_15639_to_fp16 = const()[name = string("op_15639_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396094848)))]; - tensor attn_output_189_cast_fp16 = mul(x = normed_447_cast_fp16, y = var_15639_to_fp16)[name = string("attn_output_189_cast_fp16")]; - tensor hidden_states_299_cast_fp16 = add(x = hidden_states_289_cast_fp16, y = attn_output_189_cast_fp16)[name = string("hidden_states_299_cast_fp16")]; - int32 var_15652 = const()[name = string("op_15652"), val = int32(-1)]; - fp16 const_793_promoted_to_fp16 = const()[name = string("const_793_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15654_cast_fp16 = mul(x = hidden_states_299_cast_fp16, y = const_793_promoted_to_fp16)[name = string("op_15654_cast_fp16")]; - bool input_373_interleave_0 = const()[name = string("input_373_interleave_0"), val = bool(false)]; - tensor input_373_cast_fp16 = concat(axis = var_15652, interleave = input_373_interleave_0, values = (hidden_states_299_cast_fp16, var_15654_cast_fp16))[name = string("input_373_cast_fp16")]; - tensor normed_449_axes_0 = const()[name = string("normed_449_axes_0"), val = tensor([-1])]; - fp16 var_15649_to_fp16 = const()[name = string("op_15649_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_449_cast_fp16 = layer_norm(axes = normed_449_axes_0, epsilon = var_15649_to_fp16, x = input_373_cast_fp16)[name = string("normed_449_cast_fp16")]; - tensor normed_451_begin_0 = const()[name = string("normed_451_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_451_end_0 = const()[name = string("normed_451_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_451_end_mask_0 = const()[name = string("normed_451_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_451_cast_fp16 = slice_by_index(begin = normed_451_begin_0, end = normed_451_end_0, end_mask = normed_451_end_mask_0, x = normed_449_cast_fp16)[name = string("normed_451_cast_fp16")]; - tensor var_15668_to_fp16 = const()[name = string("op_15668_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396097216)))]; - tensor x_301_cast_fp16 = mul(x = normed_451_cast_fp16, y = var_15668_to_fp16)[name = string("x_301_cast_fp16")]; - tensor var_15680 = const()[name = string("op_15680"), val = tensor([0, 2, 1])]; - tensor input_375_axes_0 = const()[name = string("input_375_axes_0"), val = tensor([2])]; - tensor var_15681_cast_fp16 = transpose(perm = var_15680, x = x_301_cast_fp16)[name = string("transpose_64")]; - tensor input_375_cast_fp16 = expand_dims(axes = input_375_axes_0, x = var_15681_cast_fp16)[name = string("input_375_cast_fp16")]; - string x_303_pad_type_0 = const()[name = string("x_303_pad_type_0"), val = string("valid")]; - tensor x_303_strides_0 = const()[name = string("x_303_strides_0"), val = tensor([1, 1])]; - tensor x_303_pad_0 = const()[name = string("x_303_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_303_dilations_0 = const()[name = string("x_303_dilations_0"), val = tensor([1, 1])]; - int32 x_303_groups_0 = const()[name = string("x_303_groups_0"), val = int32(1)]; - tensor model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1410411968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1416384000))))[name = string("model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_303_cast_fp16 = conv(dilations = x_303_dilations_0, groups = x_303_groups_0, pad = x_303_pad_0, pad_type = x_303_pad_type_0, strides = x_303_strides_0, weight = model_model_layers_18_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_375_cast_fp16)[name = string("x_303_cast_fp16")]; - string b_37_pad_type_0 = const()[name = string("b_37_pad_type_0"), val = string("valid")]; - tensor b_37_strides_0 = const()[name = string("b_37_strides_0"), val = tensor([1, 1])]; - tensor b_37_pad_0 = const()[name = string("b_37_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_37_dilations_0 = const()[name = string("b_37_dilations_0"), val = tensor([1, 1])]; - int32 b_37_groups_0 = const()[name = string("b_37_groups_0"), val = int32(1)]; - tensor model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1416494656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1422466688))))[name = string("model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_37_cast_fp16 = conv(dilations = b_37_dilations_0, groups = b_37_groups_0, pad = b_37_pad_0, pad_type = b_37_pad_type_0, strides = b_37_strides_0, weight = model_model_layers_18_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_375_cast_fp16)[name = string("b_37_cast_fp16")]; - string var_15706_mode_0 = const()[name = string("op_15706_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_15706_cast_fp16 = gelu(mode = var_15706_mode_0, x = x_303_cast_fp16)[name = string("op_15706_cast_fp16")]; - tensor input_377_cast_fp16 = mul(x = var_15706_cast_fp16, y = b_37_cast_fp16)[name = string("input_377_cast_fp16")]; - string e_37_pad_type_0 = const()[name = string("e_37_pad_type_0"), val = string("valid")]; - tensor e_37_strides_0 = const()[name = string("e_37_strides_0"), val = tensor([1, 1])]; - tensor e_37_pad_0 = const()[name = string("e_37_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_37_dilations_0 = const()[name = string("e_37_dilations_0"), val = tensor([1, 1])]; - int32 e_37_groups_0 = const()[name = string("e_37_groups_0"), val = int32(1)]; - tensor model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408264960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414236992))))[name = string("model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_37_cast_fp16 = conv(dilations = e_37_dilations_0, groups = e_37_groups_0, pad = e_37_pad_0, pad_type = e_37_pad_type_0, strides = e_37_strides_0, weight = model_model_layers_18_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_377_cast_fp16)[name = string("e_37_cast_fp16")]; - tensor var_15714_axes_0 = const()[name = string("op_15714_axes_0"), val = tensor([2])]; - tensor var_15714_cast_fp16 = squeeze(axes = var_15714_axes_0, x = e_37_cast_fp16)[name = string("op_15714_cast_fp16")]; - tensor var_15715 = const()[name = string("op_15715"), val = tensor([0, 2, 1])]; - int32 var_15726 = const()[name = string("op_15726"), val = int32(-1)]; - fp16 const_797_promoted_to_fp16 = const()[name = string("const_797_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_301_cast_fp16 = transpose(perm = var_15715, x = var_15714_cast_fp16)[name = string("transpose_63")]; - tensor var_15728_cast_fp16 = mul(x = hidden_states_301_cast_fp16, y = const_797_promoted_to_fp16)[name = string("op_15728_cast_fp16")]; - bool input_379_interleave_0 = const()[name = string("input_379_interleave_0"), val = bool(false)]; - tensor input_379_cast_fp16 = concat(axis = var_15726, interleave = input_379_interleave_0, values = (hidden_states_301_cast_fp16, var_15728_cast_fp16))[name = string("input_379_cast_fp16")]; - tensor normed_453_axes_0 = const()[name = string("normed_453_axes_0"), val = tensor([-1])]; - fp16 var_15723_to_fp16 = const()[name = string("op_15723_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_453_cast_fp16 = layer_norm(axes = normed_453_axes_0, epsilon = var_15723_to_fp16, x = input_379_cast_fp16)[name = string("normed_453_cast_fp16")]; - tensor normed_455_begin_0 = const()[name = string("normed_455_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_455_end_0 = const()[name = string("normed_455_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_455_end_mask_0 = const()[name = string("normed_455_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_455_cast_fp16 = slice_by_index(begin = normed_455_begin_0, end = normed_455_end_0, end_mask = normed_455_end_mask_0, x = normed_453_cast_fp16)[name = string("normed_455_cast_fp16")]; - tensor var_15742_to_fp16 = const()[name = string("op_15742_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414255488)))]; - tensor hidden_states_303_cast_fp16 = mul(x = normed_455_cast_fp16, y = var_15742_to_fp16)[name = string("hidden_states_303_cast_fp16")]; - tensor hidden_states_305_cast_fp16 = add(x = hidden_states_299_cast_fp16, y = hidden_states_303_cast_fp16)[name = string("hidden_states_305_cast_fp16")]; - int32 var_15796 = const()[name = string("op_15796"), val = int32(-1)]; - fp16 const_802_promoted_to_fp16 = const()[name = string("const_802_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15798_cast_fp16 = mul(x = hidden_states_305_cast_fp16, y = const_802_promoted_to_fp16)[name = string("op_15798_cast_fp16")]; - bool input_381_interleave_0 = const()[name = string("input_381_interleave_0"), val = bool(false)]; - tensor input_381_cast_fp16 = concat(axis = var_15796, interleave = input_381_interleave_0, values = (hidden_states_305_cast_fp16, var_15798_cast_fp16))[name = string("input_381_cast_fp16")]; - tensor normed_457_axes_0 = const()[name = string("normed_457_axes_0"), val = tensor([-1])]; - fp16 var_15793_to_fp16 = const()[name = string("op_15793_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_457_cast_fp16 = layer_norm(axes = normed_457_axes_0, epsilon = var_15793_to_fp16, x = input_381_cast_fp16)[name = string("normed_457_cast_fp16")]; - tensor normed_459_begin_0 = const()[name = string("normed_459_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_459_end_0 = const()[name = string("normed_459_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_459_end_mask_0 = const()[name = string("normed_459_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_459_cast_fp16 = slice_by_index(begin = normed_459_begin_0, end = normed_459_end_0, end_mask = normed_459_end_mask_0, x = normed_457_cast_fp16)[name = string("normed_459_cast_fp16")]; - tensor var_15812_to_fp16 = const()[name = string("op_15812_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414257856)))]; - tensor hidden_states_307_cast_fp16 = mul(x = normed_459_cast_fp16, y = var_15812_to_fp16)[name = string("hidden_states_307_cast_fp16")]; - tensor var_15823 = const()[name = string("op_15823"), val = tensor([0, 2, 1])]; - tensor var_15826_axes_0 = const()[name = string("op_15826_axes_0"), val = tensor([2])]; - tensor var_15824_cast_fp16 = transpose(perm = var_15823, x = hidden_states_307_cast_fp16)[name = string("transpose_62")]; - tensor var_15826_cast_fp16 = expand_dims(axes = var_15826_axes_0, x = var_15824_cast_fp16)[name = string("op_15826_cast_fp16")]; - string query_states_153_pad_type_0 = const()[name = string("query_states_153_pad_type_0"), val = string("valid")]; - tensor query_states_153_strides_0 = const()[name = string("query_states_153_strides_0"), val = tensor([1, 1])]; - tensor query_states_153_pad_0 = const()[name = string("query_states_153_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_153_dilations_0 = const()[name = string("query_states_153_dilations_0"), val = tensor([1, 1])]; - int32 query_states_153_groups_0 = const()[name = string("query_states_153_groups_0"), val = int32(1)]; - tensor query_states_153 = conv(dilations = query_states_153_dilations_0, groups = query_states_153_groups_0, pad = query_states_153_pad_0, pad_type = query_states_153_pad_type_0, strides = query_states_153_strides_0, weight = model_model_layers_19_self_attn_q_proj_weight_palettized, x = var_15826_cast_fp16)[name = string("query_states_153")]; - string key_states_191_pad_type_0 = const()[name = string("key_states_191_pad_type_0"), val = string("valid")]; - tensor key_states_191_strides_0 = const()[name = string("key_states_191_strides_0"), val = tensor([1, 1])]; - tensor key_states_191_pad_0 = const()[name = string("key_states_191_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_191_dilations_0 = const()[name = string("key_states_191_dilations_0"), val = tensor([1, 1])]; - int32 key_states_191_groups_0 = const()[name = string("key_states_191_groups_0"), val = int32(1)]; - tensor key_states_191 = conv(dilations = key_states_191_dilations_0, groups = key_states_191_groups_0, pad = key_states_191_pad_0, pad_type = key_states_191_pad_type_0, strides = key_states_191_strides_0, weight = model_model_layers_19_self_attn_k_proj_weight_palettized, x = var_15826_cast_fp16)[name = string("key_states_191")]; - string value_states_153_pad_type_0 = const()[name = string("value_states_153_pad_type_0"), val = string("valid")]; - tensor value_states_153_strides_0 = const()[name = string("value_states_153_strides_0"), val = tensor([1, 1])]; - tensor value_states_153_pad_0 = const()[name = string("value_states_153_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_153_dilations_0 = const()[name = string("value_states_153_dilations_0"), val = tensor([1, 1])]; - int32 value_states_153_groups_0 = const()[name = string("value_states_153_groups_0"), val = int32(1)]; - tensor value_states_153 = conv(dilations = value_states_153_dilations_0, groups = value_states_153_groups_0, pad = value_states_153_pad_0, pad_type = value_states_153_pad_type_0, strides = value_states_153_strides_0, weight = model_model_layers_19_self_attn_v_proj_weight_palettized, x = var_15826_cast_fp16)[name = string("value_states_153")]; - tensor var_15868 = const()[name = string("op_15868"), val = tensor([1, 4, 256, 64])]; - tensor var_15869 = reshape(shape = var_15868, x = query_states_153)[name = string("op_15869")]; - tensor var_15874 = const()[name = string("op_15874"), val = tensor([0, 1, 3, 2])]; - tensor var_15879 = const()[name = string("op_15879"), val = tensor([1, 1, 256, 64])]; - tensor var_15880 = reshape(shape = var_15879, x = key_states_191)[name = string("op_15880")]; - tensor var_15885 = const()[name = string("op_15885"), val = tensor([0, 1, 3, 2])]; - tensor var_15890 = const()[name = string("op_15890"), val = tensor([1, 1, 256, 64])]; - tensor var_15891 = reshape(shape = var_15890, x = value_states_153)[name = string("op_15891")]; - tensor var_15896 = const()[name = string("op_15896"), val = tensor([0, 1, 3, 2])]; - int32 var_15907 = const()[name = string("op_15907"), val = int32(-1)]; - fp16 const_807_promoted = const()[name = string("const_807_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_309 = transpose(perm = var_15874, x = var_15869)[name = string("transpose_61")]; - tensor var_15909 = mul(x = hidden_states_309, y = const_807_promoted)[name = string("op_15909")]; - bool input_385_interleave_0 = const()[name = string("input_385_interleave_0"), val = bool(false)]; - tensor input_385 = concat(axis = var_15907, interleave = input_385_interleave_0, values = (hidden_states_309, var_15909))[name = string("input_385")]; - tensor normed_461_axes_0 = const()[name = string("normed_461_axes_0"), val = tensor([-1])]; - fp16 var_15904_to_fp16 = const()[name = string("op_15904_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_461_cast_fp16 = layer_norm(axes = normed_461_axes_0, epsilon = var_15904_to_fp16, x = input_385)[name = string("normed_461_cast_fp16")]; - tensor normed_463_begin_0 = const()[name = string("normed_463_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_463_end_0 = const()[name = string("normed_463_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_463_end_mask_0 = const()[name = string("normed_463_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_463 = slice_by_index(begin = normed_463_begin_0, end = normed_463_end_0, end_mask = normed_463_end_mask_0, x = normed_461_cast_fp16)[name = string("normed_463")]; - tensor var_15923_to_fp16 = const()[name = string("op_15923_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414260224)))]; - tensor q_39_cast_fp16 = mul(x = normed_463, y = var_15923_to_fp16)[name = string("q_39_cast_fp16")]; - int32 var_15934 = const()[name = string("op_15934"), val = int32(-1)]; - fp16 const_811_promoted = const()[name = string("const_811_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_311 = transpose(perm = var_15885, x = var_15880)[name = string("transpose_60")]; - tensor var_15936 = mul(x = hidden_states_311, y = const_811_promoted)[name = string("op_15936")]; - bool input_387_interleave_0 = const()[name = string("input_387_interleave_0"), val = bool(false)]; - tensor input_387 = concat(axis = var_15934, interleave = input_387_interleave_0, values = (hidden_states_311, var_15936))[name = string("input_387")]; - tensor normed_465_axes_0 = const()[name = string("normed_465_axes_0"), val = tensor([-1])]; - fp16 var_15931_to_fp16 = const()[name = string("op_15931_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_465_cast_fp16 = layer_norm(axes = normed_465_axes_0, epsilon = var_15931_to_fp16, x = input_387)[name = string("normed_465_cast_fp16")]; - tensor normed_467_begin_0 = const()[name = string("normed_467_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_467_end_0 = const()[name = string("normed_467_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_467_end_mask_0 = const()[name = string("normed_467_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_467 = slice_by_index(begin = normed_467_begin_0, end = normed_467_end_0, end_mask = normed_467_end_mask_0, x = normed_465_cast_fp16)[name = string("normed_467")]; - tensor var_15950_to_fp16 = const()[name = string("op_15950_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414260800)))]; - tensor k_39_cast_fp16 = mul(x = normed_467, y = var_15950_to_fp16)[name = string("k_39_cast_fp16")]; - tensor var_15964_cast_fp16 = mul(x = q_39_cast_fp16, y = cos_5)[name = string("op_15964_cast_fp16")]; - tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_77_cast_fp16 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_39_cast_fp16)[name = string("x1_77_cast_fp16")]; - tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_77_cast_fp16 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_39_cast_fp16)[name = string("x2_77_cast_fp16")]; - fp16 const_817_promoted_to_fp16 = const()[name = string("const_817_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_15985_cast_fp16 = mul(x = x2_77_cast_fp16, y = const_817_promoted_to_fp16)[name = string("op_15985_cast_fp16")]; - int32 var_15987 = const()[name = string("op_15987"), val = int32(-1)]; - bool var_15988_interleave_0 = const()[name = string("op_15988_interleave_0"), val = bool(false)]; - tensor var_15988_cast_fp16 = concat(axis = var_15987, interleave = var_15988_interleave_0, values = (var_15985_cast_fp16, x1_77_cast_fp16))[name = string("op_15988_cast_fp16")]; - tensor var_15989_cast_fp16 = mul(x = var_15988_cast_fp16, y = sin_5)[name = string("op_15989_cast_fp16")]; - tensor query_states_155_cast_fp16 = add(x = var_15964_cast_fp16, y = var_15989_cast_fp16)[name = string("query_states_155_cast_fp16")]; - tensor var_15992_cast_fp16 = mul(x = k_39_cast_fp16, y = cos_5)[name = string("op_15992_cast_fp16")]; - tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_79_cast_fp16 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_39_cast_fp16)[name = string("x1_79_cast_fp16")]; - tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_79_cast_fp16 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_39_cast_fp16)[name = string("x2_79_cast_fp16")]; - fp16 const_820_promoted_to_fp16 = const()[name = string("const_820_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16013_cast_fp16 = mul(x = x2_79_cast_fp16, y = const_820_promoted_to_fp16)[name = string("op_16013_cast_fp16")]; - int32 var_16015 = const()[name = string("op_16015"), val = int32(-1)]; - bool var_16016_interleave_0 = const()[name = string("op_16016_interleave_0"), val = bool(false)]; - tensor var_16016_cast_fp16 = concat(axis = var_16015, interleave = var_16016_interleave_0, values = (var_16013_cast_fp16, x1_79_cast_fp16))[name = string("op_16016_cast_fp16")]; - tensor var_16017_cast_fp16 = mul(x = var_16016_cast_fp16, y = sin_5)[name = string("op_16017_cast_fp16")]; - tensor key_states_193_cast_fp16 = add(x = var_15992_cast_fp16, y = var_16017_cast_fp16)[name = string("key_states_193_cast_fp16")]; - tensor key_slice_33_begin_0 = const()[name = string("key_slice_33_begin_0"), val = tensor([16, 0, 0, 0])]; - tensor key_slice_33_end_0 = const()[name = string("key_slice_33_end_0"), val = tensor([17, 1, 512, 256])]; - tensor key_slice_33_end_mask_0 = const()[name = string("key_slice_33_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_33_cast_fp16 = slice_by_index(begin = key_slice_33_begin_0, end = key_slice_33_end_0, end_mask = key_slice_33_end_mask_0, x = coreml_update_state_89)[name = string("key_slice_33_cast_fp16")]; - tensor var_16054_begin_0 = const()[name = string("op_16054_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_16054_end_0 = const()[name = string("op_16054_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_16054_end_mask_0 = const()[name = string("op_16054_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_16054_cast_fp16 = slice_by_index(begin = var_16054_begin_0, end = var_16054_end_0, end_mask = var_16054_end_mask_0, x = key_slice_33_cast_fp16)[name = string("op_16054_cast_fp16")]; - int32 var_16081 = const()[name = string("op_16081"), val = int32(2)]; - bool shifted_key_33_interleave_0 = const()[name = string("shifted_key_33_interleave_0"), val = bool(false)]; - tensor shifted_key_33_cast_fp16 = concat(axis = var_16081, interleave = shifted_key_33_interleave_0, values = (var_16054_cast_fp16, key_states_193_cast_fp16))[name = string("shifted_key_33_cast_fp16")]; - tensor concat_278 = const()[name = string("concat_278"), val = tensor([16, 0, 0, 0])]; - tensor concat_279 = const()[name = string("concat_279"), val = tensor([17, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16 = slice_update(begin = concat_278, begin_mask = model_model_kv_cache_local_internal_tensor_assign_33_begin_mask_0, end = concat_279, end_mask = model_model_kv_cache_local_internal_tensor_assign_33_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_33_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_33_stride_0, update = shifted_key_33_cast_fp16, x = coreml_update_state_89)[name = string("model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_33_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_194_write_state")]; - tensor coreml_update_state_90 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_194")]; - tensor value_slice_33_begin_0 = const()[name = string("value_slice_33_begin_0"), val = tensor([38, 0, 0, 0])]; - tensor value_slice_33_end_0 = const()[name = string("value_slice_33_end_0"), val = tensor([39, 1, 512, 256])]; - tensor value_slice_33_end_mask_0 = const()[name = string("value_slice_33_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_33_cast_fp16 = slice_by_index(begin = value_slice_33_begin_0, end = value_slice_33_end_0, end_mask = value_slice_33_end_mask_0, x = coreml_update_state_90)[name = string("value_slice_33_cast_fp16")]; - tensor var_16124_begin_0 = const()[name = string("op_16124_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_16124_end_0 = const()[name = string("op_16124_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_16124_end_mask_0 = const()[name = string("op_16124_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_16124_cast_fp16 = slice_by_index(begin = var_16124_begin_0, end = var_16124_end_0, end_mask = var_16124_end_mask_0, x = value_slice_33_cast_fp16)[name = string("op_16124_cast_fp16")]; - int32 var_16151 = const()[name = string("op_16151"), val = int32(2)]; - bool shifted_value_33_interleave_0 = const()[name = string("shifted_value_33_interleave_0"), val = bool(false)]; - tensor value_states_155 = transpose(perm = var_15896, x = var_15891)[name = string("transpose_59")]; - tensor shifted_value_33_cast_fp16 = concat(axis = var_16151, interleave = shifted_value_33_interleave_0, values = (var_16124_cast_fp16, value_states_155))[name = string("shifted_value_33_cast_fp16")]; - tensor concat_280 = const()[name = string("concat_280"), val = tensor([38, 0, 0, 0])]; - tensor concat_281 = const()[name = string("concat_281"), val = tensor([39, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16 = slice_update(begin = concat_280, begin_mask = model_model_kv_cache_local_internal_tensor_assign_34_begin_mask_0, end = concat_281, end_mask = model_model_kv_cache_local_internal_tensor_assign_34_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_34_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_34_stride_0, update = shifted_value_33_cast_fp16, x = coreml_update_state_90)[name = string("model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_34_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_195_write_state")]; - tensor coreml_update_state_91 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_195")]; - tensor var_16179_begin_0 = const()[name = string("op_16179_begin_0"), val = tensor([16, 0, 0, 0])]; - tensor var_16179_end_0 = const()[name = string("op_16179_end_0"), val = tensor([17, 1, 512, 256])]; - tensor var_16179_end_mask_0 = const()[name = string("op_16179_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_16179_cast_fp16 = slice_by_index(begin = var_16179_begin_0, end = var_16179_end_0, end_mask = var_16179_end_mask_0, x = coreml_update_state_91)[name = string("op_16179_cast_fp16")]; - tensor var_16186_begin_0 = const()[name = string("op_16186_begin_0"), val = tensor([38, 0, 0, 0])]; - tensor var_16186_end_0 = const()[name = string("op_16186_end_0"), val = tensor([39, 1, 512, 256])]; - tensor var_16186_end_mask_0 = const()[name = string("op_16186_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_16186_cast_fp16 = slice_by_index(begin = var_16186_begin_0, end = var_16186_end_0, end_mask = var_16186_end_mask_0, x = coreml_update_state_91)[name = string("op_16186_cast_fp16")]; - tensor var_16225 = const()[name = string("op_16225"), val = tensor([1, 4, 1, 1])]; - tensor x_309_cast_fp16 = tile(reps = var_16225, x = var_16179_cast_fp16)[name = string("x_309_cast_fp16")]; - tensor var_16245 = const()[name = string("op_16245"), val = tensor([1, 4, 1, 1])]; - tensor x_315_cast_fp16 = tile(reps = var_16245, x = var_16186_cast_fp16)[name = string("x_315_cast_fp16")]; - bool var_16272_transpose_x_0 = const()[name = string("op_16272_transpose_x_0"), val = bool(false)]; - bool var_16272_transpose_y_0 = const()[name = string("op_16272_transpose_y_0"), val = bool(true)]; - tensor var_16272 = matmul(transpose_x = var_16272_transpose_x_0, transpose_y = var_16272_transpose_y_0, x = query_states_155_cast_fp16, y = x_309_cast_fp16)[name = string("op_16272")]; - fp16 var_16273_to_fp16 = const()[name = string("op_16273_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_77_cast_fp16 = mul(x = var_16272, y = var_16273_to_fp16)[name = string("attn_weights_77_cast_fp16")]; - tensor attn_weights_79_cast_fp16 = add(x = attn_weights_77_cast_fp16, y = mask_slice_1)[name = string("attn_weights_79_cast_fp16")]; - int32 var_16308 = const()[name = string("op_16308"), val = int32(-1)]; - tensor var_16310_cast_fp16 = softmax(axis = var_16308, x = attn_weights_79_cast_fp16)[name = string("op_16310_cast_fp16")]; - tensor concat_286 = const()[name = string("concat_286"), val = tensor([4, 64, 512])]; - tensor reshape_57_cast_fp16 = reshape(shape = concat_286, x = var_16310_cast_fp16)[name = string("reshape_57_cast_fp16")]; - tensor concat_287 = const()[name = string("concat_287"), val = tensor([4, 512, 256])]; - tensor reshape_58_cast_fp16 = reshape(shape = concat_287, x = x_315_cast_fp16)[name = string("reshape_58_cast_fp16")]; - bool matmul_19_transpose_x_0 = const()[name = string("matmul_19_transpose_x_0"), val = bool(false)]; - bool matmul_19_transpose_y_0 = const()[name = string("matmul_19_transpose_y_0"), val = bool(false)]; - tensor matmul_19_cast_fp16 = matmul(transpose_x = matmul_19_transpose_x_0, transpose_y = matmul_19_transpose_y_0, x = reshape_57_cast_fp16, y = reshape_58_cast_fp16)[name = string("matmul_19_cast_fp16")]; - tensor concat_291 = const()[name = string("concat_291"), val = tensor([1, 4, 64, 256])]; - tensor reshape_59_cast_fp16 = reshape(shape = concat_291, x = matmul_19_cast_fp16)[name = string("reshape_59_cast_fp16")]; - tensor var_16322_perm_0 = const()[name = string("op_16322_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_16341 = const()[name = string("op_16341"), val = tensor([1, 64, 1024])]; - tensor var_16322_cast_fp16 = transpose(perm = var_16322_perm_0, x = reshape_59_cast_fp16)[name = string("transpose_58")]; - tensor attn_output_195_cast_fp16 = reshape(shape = var_16341, x = var_16322_cast_fp16)[name = string("attn_output_195_cast_fp16")]; - tensor var_16346 = const()[name = string("op_16346"), val = tensor([0, 2, 1])]; - string var_16362_pad_type_0 = const()[name = string("op_16362_pad_type_0"), val = string("valid")]; - int32 var_16362_groups_0 = const()[name = string("op_16362_groups_0"), val = int32(1)]; - tensor var_16362_strides_0 = const()[name = string("op_16362_strides_0"), val = tensor([1])]; - tensor var_16362_pad_0 = const()[name = string("op_16362_pad_0"), val = tensor([0, 0])]; - tensor var_16362_dilations_0 = const()[name = string("op_16362_dilations_0"), val = tensor([1])]; - tensor squeeze_19_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(414261376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415146176))))[name = string("squeeze_19_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_16347_cast_fp16 = transpose(perm = var_16346, x = attn_output_195_cast_fp16)[name = string("transpose_57")]; - tensor var_16362_cast_fp16 = conv(dilations = var_16362_dilations_0, groups = var_16362_groups_0, pad = var_16362_pad_0, pad_type = var_16362_pad_type_0, strides = var_16362_strides_0, weight = squeeze_19_cast_fp16_to_fp32_to_fp16_palettized, x = var_16347_cast_fp16)[name = string("op_16362_cast_fp16")]; - tensor var_16366 = const()[name = string("op_16366"), val = tensor([0, 2, 1])]; - int32 var_16377 = const()[name = string("op_16377"), val = int32(-1)]; - fp16 const_831_promoted_to_fp16 = const()[name = string("const_831_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_313_cast_fp16 = transpose(perm = var_16366, x = var_16362_cast_fp16)[name = string("transpose_56")]; - tensor var_16379_cast_fp16 = mul(x = hidden_states_313_cast_fp16, y = const_831_promoted_to_fp16)[name = string("op_16379_cast_fp16")]; - bool input_391_interleave_0 = const()[name = string("input_391_interleave_0"), val = bool(false)]; - tensor input_391_cast_fp16 = concat(axis = var_16377, interleave = input_391_interleave_0, values = (hidden_states_313_cast_fp16, var_16379_cast_fp16))[name = string("input_391_cast_fp16")]; - tensor normed_469_axes_0 = const()[name = string("normed_469_axes_0"), val = tensor([-1])]; - fp16 var_16374_to_fp16 = const()[name = string("op_16374_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_469_cast_fp16 = layer_norm(axes = normed_469_axes_0, epsilon = var_16374_to_fp16, x = input_391_cast_fp16)[name = string("normed_469_cast_fp16")]; - tensor normed_471_begin_0 = const()[name = string("normed_471_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_471_end_0 = const()[name = string("normed_471_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_471_end_mask_0 = const()[name = string("normed_471_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_471_cast_fp16 = slice_by_index(begin = normed_471_begin_0, end = normed_471_end_0, end_mask = normed_471_end_mask_0, x = normed_469_cast_fp16)[name = string("normed_471_cast_fp16")]; - tensor var_16393_to_fp16 = const()[name = string("op_16393_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415164672)))]; - tensor attn_output_199_cast_fp16 = mul(x = normed_471_cast_fp16, y = var_16393_to_fp16)[name = string("attn_output_199_cast_fp16")]; - tensor hidden_states_315_cast_fp16 = add(x = hidden_states_305_cast_fp16, y = attn_output_199_cast_fp16)[name = string("hidden_states_315_cast_fp16")]; - int32 var_16406 = const()[name = string("op_16406"), val = int32(-1)]; - fp16 const_835_promoted_to_fp16 = const()[name = string("const_835_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16408_cast_fp16 = mul(x = hidden_states_315_cast_fp16, y = const_835_promoted_to_fp16)[name = string("op_16408_cast_fp16")]; - bool input_393_interleave_0 = const()[name = string("input_393_interleave_0"), val = bool(false)]; - tensor input_393_cast_fp16 = concat(axis = var_16406, interleave = input_393_interleave_0, values = (hidden_states_315_cast_fp16, var_16408_cast_fp16))[name = string("input_393_cast_fp16")]; - tensor normed_473_axes_0 = const()[name = string("normed_473_axes_0"), val = tensor([-1])]; - fp16 var_16403_to_fp16 = const()[name = string("op_16403_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_473_cast_fp16 = layer_norm(axes = normed_473_axes_0, epsilon = var_16403_to_fp16, x = input_393_cast_fp16)[name = string("normed_473_cast_fp16")]; - tensor normed_475_begin_0 = const()[name = string("normed_475_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_475_end_0 = const()[name = string("normed_475_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_475_end_mask_0 = const()[name = string("normed_475_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_475_cast_fp16 = slice_by_index(begin = normed_475_begin_0, end = normed_475_end_0, end_mask = normed_475_end_mask_0, x = normed_473_cast_fp16)[name = string("normed_475_cast_fp16")]; - tensor var_16422_to_fp16 = const()[name = string("op_16422_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415167040)))]; - tensor x_317_cast_fp16 = mul(x = normed_475_cast_fp16, y = var_16422_to_fp16)[name = string("x_317_cast_fp16")]; - tensor var_16434 = const()[name = string("op_16434"), val = tensor([0, 2, 1])]; - tensor input_395_axes_0 = const()[name = string("input_395_axes_0"), val = tensor([2])]; - tensor var_16435_cast_fp16 = transpose(perm = var_16434, x = x_317_cast_fp16)[name = string("transpose_55")]; - tensor input_395_cast_fp16 = expand_dims(axes = input_395_axes_0, x = var_16435_cast_fp16)[name = string("input_395_cast_fp16")]; - string x_319_pad_type_0 = const()[name = string("x_319_pad_type_0"), val = string("valid")]; - tensor x_319_strides_0 = const()[name = string("x_319_strides_0"), val = tensor([1, 1])]; - tensor x_319_pad_0 = const()[name = string("x_319_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_319_dilations_0 = const()[name = string("x_319_dilations_0"), val = tensor([1, 1])]; - int32 x_319_groups_0 = const()[name = string("x_319_groups_0"), val = int32(1)]; - tensor model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1422577344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1428549376))))[name = string("model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_319_cast_fp16 = conv(dilations = x_319_dilations_0, groups = x_319_groups_0, pad = x_319_pad_0, pad_type = x_319_pad_type_0, strides = x_319_strides_0, weight = model_model_layers_19_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_395_cast_fp16)[name = string("x_319_cast_fp16")]; - string b_39_pad_type_0 = const()[name = string("b_39_pad_type_0"), val = string("valid")]; - tensor b_39_strides_0 = const()[name = string("b_39_strides_0"), val = tensor([1, 1])]; - tensor b_39_pad_0 = const()[name = string("b_39_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_39_dilations_0 = const()[name = string("b_39_dilations_0"), val = tensor([1, 1])]; - int32 b_39_groups_0 = const()[name = string("b_39_groups_0"), val = int32(1)]; - tensor model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1428660032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1434632064))))[name = string("model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_39_cast_fp16 = conv(dilations = b_39_dilations_0, groups = b_39_groups_0, pad = b_39_pad_0, pad_type = b_39_pad_type_0, strides = b_39_strides_0, weight = model_model_layers_19_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_395_cast_fp16)[name = string("b_39_cast_fp16")]; - string var_16460_mode_0 = const()[name = string("op_16460_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_16460_cast_fp16 = gelu(mode = var_16460_mode_0, x = x_319_cast_fp16)[name = string("op_16460_cast_fp16")]; - tensor input_397_cast_fp16 = mul(x = var_16460_cast_fp16, y = b_39_cast_fp16)[name = string("input_397_cast_fp16")]; - string e_39_pad_type_0 = const()[name = string("e_39_pad_type_0"), val = string("valid")]; - tensor e_39_strides_0 = const()[name = string("e_39_strides_0"), val = tensor([1, 1])]; - tensor e_39_pad_0 = const()[name = string("e_39_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_39_dilations_0 = const()[name = string("e_39_dilations_0"), val = tensor([1, 1])]; - int32 e_39_groups_0 = const()[name = string("e_39_groups_0"), val = int32(1)]; - tensor model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427334784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433306816))))[name = string("model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_39_cast_fp16 = conv(dilations = e_39_dilations_0, groups = e_39_groups_0, pad = e_39_pad_0, pad_type = e_39_pad_type_0, strides = e_39_strides_0, weight = model_model_layers_19_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_397_cast_fp16)[name = string("e_39_cast_fp16")]; - tensor var_16468_axes_0 = const()[name = string("op_16468_axes_0"), val = tensor([2])]; - tensor var_16468_cast_fp16 = squeeze(axes = var_16468_axes_0, x = e_39_cast_fp16)[name = string("op_16468_cast_fp16")]; - tensor var_16469 = const()[name = string("op_16469"), val = tensor([0, 2, 1])]; - int32 var_16480 = const()[name = string("op_16480"), val = int32(-1)]; - fp16 const_839_promoted_to_fp16 = const()[name = string("const_839_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_317_cast_fp16 = transpose(perm = var_16469, x = var_16468_cast_fp16)[name = string("transpose_54")]; - tensor var_16482_cast_fp16 = mul(x = hidden_states_317_cast_fp16, y = const_839_promoted_to_fp16)[name = string("op_16482_cast_fp16")]; - bool input_399_interleave_0 = const()[name = string("input_399_interleave_0"), val = bool(false)]; - tensor input_399_cast_fp16 = concat(axis = var_16480, interleave = input_399_interleave_0, values = (hidden_states_317_cast_fp16, var_16482_cast_fp16))[name = string("input_399_cast_fp16")]; - tensor normed_477_axes_0 = const()[name = string("normed_477_axes_0"), val = tensor([-1])]; - fp16 var_16477_to_fp16 = const()[name = string("op_16477_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_477_cast_fp16 = layer_norm(axes = normed_477_axes_0, epsilon = var_16477_to_fp16, x = input_399_cast_fp16)[name = string("normed_477_cast_fp16")]; - tensor normed_479_begin_0 = const()[name = string("normed_479_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_479_end_0 = const()[name = string("normed_479_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_479_end_mask_0 = const()[name = string("normed_479_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_479_cast_fp16 = slice_by_index(begin = normed_479_begin_0, end = normed_479_end_0, end_mask = normed_479_end_mask_0, x = normed_477_cast_fp16)[name = string("normed_479_cast_fp16")]; - tensor var_16496_to_fp16 = const()[name = string("op_16496_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433325312)))]; - tensor hidden_states_319_cast_fp16 = mul(x = normed_479_cast_fp16, y = var_16496_to_fp16)[name = string("hidden_states_319_cast_fp16")]; - tensor hidden_states_321_cast_fp16 = add(x = hidden_states_315_cast_fp16, y = hidden_states_319_cast_fp16)[name = string("hidden_states_321_cast_fp16")]; - int32 var_16550 = const()[name = string("op_16550"), val = int32(-1)]; - fp16 const_844_promoted_to_fp16 = const()[name = string("const_844_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16552_cast_fp16 = mul(x = hidden_states_321_cast_fp16, y = const_844_promoted_to_fp16)[name = string("op_16552_cast_fp16")]; - bool input_401_interleave_0 = const()[name = string("input_401_interleave_0"), val = bool(false)]; - tensor input_401_cast_fp16 = concat(axis = var_16550, interleave = input_401_interleave_0, values = (hidden_states_321_cast_fp16, var_16552_cast_fp16))[name = string("input_401_cast_fp16")]; - tensor normed_481_axes_0 = const()[name = string("normed_481_axes_0"), val = tensor([-1])]; - fp16 var_16547_to_fp16 = const()[name = string("op_16547_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_481_cast_fp16 = layer_norm(axes = normed_481_axes_0, epsilon = var_16547_to_fp16, x = input_401_cast_fp16)[name = string("normed_481_cast_fp16")]; - tensor normed_483_begin_0 = const()[name = string("normed_483_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_483_end_0 = const()[name = string("normed_483_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_483_end_mask_0 = const()[name = string("normed_483_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_483_cast_fp16 = slice_by_index(begin = normed_483_begin_0, end = normed_483_end_0, end_mask = normed_483_end_mask_0, x = normed_481_cast_fp16)[name = string("normed_483_cast_fp16")]; - tensor var_16566_to_fp16 = const()[name = string("op_16566_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433327680)))]; - tensor hidden_states_323_cast_fp16 = mul(x = normed_483_cast_fp16, y = var_16566_to_fp16)[name = string("hidden_states_323_cast_fp16")]; - tensor var_16577 = const()[name = string("op_16577"), val = tensor([0, 2, 1])]; - tensor var_16580_axes_0 = const()[name = string("op_16580_axes_0"), val = tensor([2])]; - tensor var_16578_cast_fp16 = transpose(perm = var_16577, x = hidden_states_323_cast_fp16)[name = string("transpose_53")]; - tensor var_16580_cast_fp16 = expand_dims(axes = var_16580_axes_0, x = var_16578_cast_fp16)[name = string("op_16580_cast_fp16")]; - string query_states_161_pad_type_0 = const()[name = string("query_states_161_pad_type_0"), val = string("valid")]; - tensor query_states_161_strides_0 = const()[name = string("query_states_161_strides_0"), val = tensor([1, 1])]; - tensor query_states_161_pad_0 = const()[name = string("query_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_161_dilations_0 = const()[name = string("query_states_161_dilations_0"), val = tensor([1, 1])]; - int32 query_states_161_groups_0 = const()[name = string("query_states_161_groups_0"), val = int32(1)]; - tensor query_states_161 = conv(dilations = query_states_161_dilations_0, groups = query_states_161_groups_0, pad = query_states_161_pad_0, pad_type = query_states_161_pad_type_0, strides = query_states_161_strides_0, weight = model_model_layers_20_self_attn_q_proj_weight_palettized, x = var_16580_cast_fp16)[name = string("query_states_161")]; - string key_states_201_pad_type_0 = const()[name = string("key_states_201_pad_type_0"), val = string("valid")]; - tensor key_states_201_strides_0 = const()[name = string("key_states_201_strides_0"), val = tensor([1, 1])]; - tensor key_states_201_pad_0 = const()[name = string("key_states_201_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_201_dilations_0 = const()[name = string("key_states_201_dilations_0"), val = tensor([1, 1])]; - int32 key_states_201_groups_0 = const()[name = string("key_states_201_groups_0"), val = int32(1)]; - tensor key_states_201 = conv(dilations = key_states_201_dilations_0, groups = key_states_201_groups_0, pad = key_states_201_pad_0, pad_type = key_states_201_pad_type_0, strides = key_states_201_strides_0, weight = model_model_layers_20_self_attn_k_proj_weight_palettized, x = var_16580_cast_fp16)[name = string("key_states_201")]; - string value_states_161_pad_type_0 = const()[name = string("value_states_161_pad_type_0"), val = string("valid")]; - tensor value_states_161_strides_0 = const()[name = string("value_states_161_strides_0"), val = tensor([1, 1])]; - tensor value_states_161_pad_0 = const()[name = string("value_states_161_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_161_dilations_0 = const()[name = string("value_states_161_dilations_0"), val = tensor([1, 1])]; - int32 value_states_161_groups_0 = const()[name = string("value_states_161_groups_0"), val = int32(1)]; - tensor value_states_161 = conv(dilations = value_states_161_dilations_0, groups = value_states_161_groups_0, pad = value_states_161_pad_0, pad_type = value_states_161_pad_type_0, strides = value_states_161_strides_0, weight = model_model_layers_20_self_attn_v_proj_weight_palettized, x = var_16580_cast_fp16)[name = string("value_states_161")]; - tensor var_16622 = const()[name = string("op_16622"), val = tensor([1, 4, 256, 64])]; - tensor var_16623 = reshape(shape = var_16622, x = query_states_161)[name = string("op_16623")]; - tensor var_16628 = const()[name = string("op_16628"), val = tensor([0, 1, 3, 2])]; - tensor var_16633 = const()[name = string("op_16633"), val = tensor([1, 1, 256, 64])]; - tensor var_16634 = reshape(shape = var_16633, x = key_states_201)[name = string("op_16634")]; - tensor var_16639 = const()[name = string("op_16639"), val = tensor([0, 1, 3, 2])]; - tensor var_16644 = const()[name = string("op_16644"), val = tensor([1, 1, 256, 64])]; - tensor var_16645 = reshape(shape = var_16644, x = value_states_161)[name = string("op_16645")]; - tensor var_16650 = const()[name = string("op_16650"), val = tensor([0, 1, 3, 2])]; - int32 var_16661 = const()[name = string("op_16661"), val = int32(-1)]; - fp16 const_849_promoted = const()[name = string("const_849_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_325 = transpose(perm = var_16628, x = var_16623)[name = string("transpose_52")]; - tensor var_16663 = mul(x = hidden_states_325, y = const_849_promoted)[name = string("op_16663")]; - bool input_405_interleave_0 = const()[name = string("input_405_interleave_0"), val = bool(false)]; - tensor input_405 = concat(axis = var_16661, interleave = input_405_interleave_0, values = (hidden_states_325, var_16663))[name = string("input_405")]; - tensor normed_485_axes_0 = const()[name = string("normed_485_axes_0"), val = tensor([-1])]; - fp16 var_16658_to_fp16 = const()[name = string("op_16658_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_485_cast_fp16 = layer_norm(axes = normed_485_axes_0, epsilon = var_16658_to_fp16, x = input_405)[name = string("normed_485_cast_fp16")]; - tensor normed_487_begin_0 = const()[name = string("normed_487_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_487_end_0 = const()[name = string("normed_487_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_487_end_mask_0 = const()[name = string("normed_487_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_487 = slice_by_index(begin = normed_487_begin_0, end = normed_487_end_0, end_mask = normed_487_end_mask_0, x = normed_485_cast_fp16)[name = string("normed_487")]; - tensor var_16677_to_fp16 = const()[name = string("op_16677_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433330048)))]; - tensor q_41_cast_fp16 = mul(x = normed_487, y = var_16677_to_fp16)[name = string("q_41_cast_fp16")]; - int32 var_16688 = const()[name = string("op_16688"), val = int32(-1)]; - fp16 const_853_promoted = const()[name = string("const_853_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_327 = transpose(perm = var_16639, x = var_16634)[name = string("transpose_51")]; - tensor var_16690 = mul(x = hidden_states_327, y = const_853_promoted)[name = string("op_16690")]; - bool input_407_interleave_0 = const()[name = string("input_407_interleave_0"), val = bool(false)]; - tensor input_407 = concat(axis = var_16688, interleave = input_407_interleave_0, values = (hidden_states_327, var_16690))[name = string("input_407")]; - tensor normed_489_axes_0 = const()[name = string("normed_489_axes_0"), val = tensor([-1])]; - fp16 var_16685_to_fp16 = const()[name = string("op_16685_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_489_cast_fp16 = layer_norm(axes = normed_489_axes_0, epsilon = var_16685_to_fp16, x = input_407)[name = string("normed_489_cast_fp16")]; - tensor normed_491_begin_0 = const()[name = string("normed_491_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_491_end_0 = const()[name = string("normed_491_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_491_end_mask_0 = const()[name = string("normed_491_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_491 = slice_by_index(begin = normed_491_begin_0, end = normed_491_end_0, end_mask = normed_491_end_mask_0, x = normed_489_cast_fp16)[name = string("normed_491")]; - tensor var_16704_to_fp16 = const()[name = string("op_16704_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433330624)))]; - tensor k_41_cast_fp16 = mul(x = normed_491, y = var_16704_to_fp16)[name = string("k_41_cast_fp16")]; - tensor var_16718_cast_fp16 = mul(x = q_41_cast_fp16, y = cos_5)[name = string("op_16718_cast_fp16")]; - tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_81_cast_fp16 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_41_cast_fp16)[name = string("x1_81_cast_fp16")]; - tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_81_cast_fp16 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_41_cast_fp16)[name = string("x2_81_cast_fp16")]; - fp16 const_859_promoted_to_fp16 = const()[name = string("const_859_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16739_cast_fp16 = mul(x = x2_81_cast_fp16, y = const_859_promoted_to_fp16)[name = string("op_16739_cast_fp16")]; - int32 var_16741 = const()[name = string("op_16741"), val = int32(-1)]; - bool var_16742_interleave_0 = const()[name = string("op_16742_interleave_0"), val = bool(false)]; - tensor var_16742_cast_fp16 = concat(axis = var_16741, interleave = var_16742_interleave_0, values = (var_16739_cast_fp16, x1_81_cast_fp16))[name = string("op_16742_cast_fp16")]; - tensor var_16743_cast_fp16 = mul(x = var_16742_cast_fp16, y = sin_5)[name = string("op_16743_cast_fp16")]; - tensor query_states_163_cast_fp16 = add(x = var_16718_cast_fp16, y = var_16743_cast_fp16)[name = string("query_states_163_cast_fp16")]; - tensor var_16746_cast_fp16 = mul(x = k_41_cast_fp16, y = cos_5)[name = string("op_16746_cast_fp16")]; - tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_83_cast_fp16 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_41_cast_fp16)[name = string("x1_83_cast_fp16")]; - tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_83_cast_fp16 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_41_cast_fp16)[name = string("x2_83_cast_fp16")]; - fp16 const_862_promoted_to_fp16 = const()[name = string("const_862_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_16767_cast_fp16 = mul(x = x2_83_cast_fp16, y = const_862_promoted_to_fp16)[name = string("op_16767_cast_fp16")]; - int32 var_16769 = const()[name = string("op_16769"), val = int32(-1)]; - bool var_16770_interleave_0 = const()[name = string("op_16770_interleave_0"), val = bool(false)]; - tensor var_16770_cast_fp16 = concat(axis = var_16769, interleave = var_16770_interleave_0, values = (var_16767_cast_fp16, x1_83_cast_fp16))[name = string("op_16770_cast_fp16")]; - tensor var_16771_cast_fp16 = mul(x = var_16770_cast_fp16, y = sin_5)[name = string("op_16771_cast_fp16")]; - tensor key_states_203_cast_fp16 = add(x = var_16746_cast_fp16, y = var_16771_cast_fp16)[name = string("key_states_203_cast_fp16")]; - tensor key_slice_35_begin_0 = const()[name = string("key_slice_35_begin_0"), val = tensor([17, 0, 0, 0])]; - tensor key_slice_35_end_0 = const()[name = string("key_slice_35_end_0"), val = tensor([18, 1, 512, 256])]; - tensor key_slice_35_end_mask_0 = const()[name = string("key_slice_35_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_35_cast_fp16 = slice_by_index(begin = key_slice_35_begin_0, end = key_slice_35_end_0, end_mask = key_slice_35_end_mask_0, x = coreml_update_state_91)[name = string("key_slice_35_cast_fp16")]; - tensor var_16808_begin_0 = const()[name = string("op_16808_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_16808_end_0 = const()[name = string("op_16808_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_16808_end_mask_0 = const()[name = string("op_16808_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_16808_cast_fp16 = slice_by_index(begin = var_16808_begin_0, end = var_16808_end_0, end_mask = var_16808_end_mask_0, x = key_slice_35_cast_fp16)[name = string("op_16808_cast_fp16")]; - int32 var_16835 = const()[name = string("op_16835"), val = int32(2)]; - bool shifted_key_35_interleave_0 = const()[name = string("shifted_key_35_interleave_0"), val = bool(false)]; - tensor shifted_key_35_cast_fp16 = concat(axis = var_16835, interleave = shifted_key_35_interleave_0, values = (var_16808_cast_fp16, key_states_203_cast_fp16))[name = string("shifted_key_35_cast_fp16")]; - tensor concat_292 = const()[name = string("concat_292"), val = tensor([17, 0, 0, 0])]; - tensor concat_293 = const()[name = string("concat_293"), val = tensor([18, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16 = slice_update(begin = concat_292, begin_mask = model_model_kv_cache_local_internal_tensor_assign_35_begin_mask_0, end = concat_293, end_mask = model_model_kv_cache_local_internal_tensor_assign_35_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_35_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_35_stride_0, update = shifted_key_35_cast_fp16, x = coreml_update_state_91)[name = string("model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_35_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_196_write_state")]; - tensor coreml_update_state_92 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_196")]; - tensor value_slice_35_begin_0 = const()[name = string("value_slice_35_begin_0"), val = tensor([39, 0, 0, 0])]; - tensor value_slice_35_end_0 = const()[name = string("value_slice_35_end_0"), val = tensor([40, 1, 512, 256])]; - tensor value_slice_35_end_mask_0 = const()[name = string("value_slice_35_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_35_cast_fp16 = slice_by_index(begin = value_slice_35_begin_0, end = value_slice_35_end_0, end_mask = value_slice_35_end_mask_0, x = coreml_update_state_92)[name = string("value_slice_35_cast_fp16")]; - tensor var_16878_begin_0 = const()[name = string("op_16878_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_16878_end_0 = const()[name = string("op_16878_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_16878_end_mask_0 = const()[name = string("op_16878_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_16878_cast_fp16 = slice_by_index(begin = var_16878_begin_0, end = var_16878_end_0, end_mask = var_16878_end_mask_0, x = value_slice_35_cast_fp16)[name = string("op_16878_cast_fp16")]; - int32 var_16905 = const()[name = string("op_16905"), val = int32(2)]; - bool shifted_value_35_interleave_0 = const()[name = string("shifted_value_35_interleave_0"), val = bool(false)]; - tensor value_states_163 = transpose(perm = var_16650, x = var_16645)[name = string("transpose_50")]; - tensor shifted_value_35_cast_fp16 = concat(axis = var_16905, interleave = shifted_value_35_interleave_0, values = (var_16878_cast_fp16, value_states_163))[name = string("shifted_value_35_cast_fp16")]; - tensor concat_294 = const()[name = string("concat_294"), val = tensor([39, 0, 0, 0])]; - tensor concat_295 = const()[name = string("concat_295"), val = tensor([40, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16 = slice_update(begin = concat_294, begin_mask = model_model_kv_cache_local_internal_tensor_assign_36_begin_mask_0, end = concat_295, end_mask = model_model_kv_cache_local_internal_tensor_assign_36_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_36_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_36_stride_0, update = shifted_value_35_cast_fp16, x = coreml_update_state_92)[name = string("model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_36_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_197_write_state")]; - tensor coreml_update_state_93 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_197")]; - tensor var_16933_begin_0 = const()[name = string("op_16933_begin_0"), val = tensor([17, 0, 0, 0])]; - tensor var_16933_end_0 = const()[name = string("op_16933_end_0"), val = tensor([18, 1, 512, 256])]; - tensor var_16933_end_mask_0 = const()[name = string("op_16933_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_16933_cast_fp16 = slice_by_index(begin = var_16933_begin_0, end = var_16933_end_0, end_mask = var_16933_end_mask_0, x = coreml_update_state_93)[name = string("op_16933_cast_fp16")]; - tensor var_16940_begin_0 = const()[name = string("op_16940_begin_0"), val = tensor([39, 0, 0, 0])]; - tensor var_16940_end_0 = const()[name = string("op_16940_end_0"), val = tensor([40, 1, 512, 256])]; - tensor var_16940_end_mask_0 = const()[name = string("op_16940_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_16940_cast_fp16 = slice_by_index(begin = var_16940_begin_0, end = var_16940_end_0, end_mask = var_16940_end_mask_0, x = coreml_update_state_93)[name = string("op_16940_cast_fp16")]; - tensor var_16979 = const()[name = string("op_16979"), val = tensor([1, 4, 1, 1])]; - tensor x_325_cast_fp16 = tile(reps = var_16979, x = var_16933_cast_fp16)[name = string("x_325_cast_fp16")]; - tensor var_16999 = const()[name = string("op_16999"), val = tensor([1, 4, 1, 1])]; - tensor x_331_cast_fp16 = tile(reps = var_16999, x = var_16940_cast_fp16)[name = string("x_331_cast_fp16")]; - bool var_17026_transpose_x_0 = const()[name = string("op_17026_transpose_x_0"), val = bool(false)]; - bool var_17026_transpose_y_0 = const()[name = string("op_17026_transpose_y_0"), val = bool(true)]; - tensor var_17026 = matmul(transpose_x = var_17026_transpose_x_0, transpose_y = var_17026_transpose_y_0, x = query_states_163_cast_fp16, y = x_325_cast_fp16)[name = string("op_17026")]; - fp16 var_17027_to_fp16 = const()[name = string("op_17027_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_81_cast_fp16 = mul(x = var_17026, y = var_17027_to_fp16)[name = string("attn_weights_81_cast_fp16")]; - tensor attn_weights_83_cast_fp16 = add(x = attn_weights_81_cast_fp16, y = mask_slice_1)[name = string("attn_weights_83_cast_fp16")]; - int32 var_17062 = const()[name = string("op_17062"), val = int32(-1)]; - tensor var_17064_cast_fp16 = softmax(axis = var_17062, x = attn_weights_83_cast_fp16)[name = string("op_17064_cast_fp16")]; - tensor concat_300 = const()[name = string("concat_300"), val = tensor([4, 64, 512])]; - tensor reshape_60_cast_fp16 = reshape(shape = concat_300, x = var_17064_cast_fp16)[name = string("reshape_60_cast_fp16")]; - tensor concat_301 = const()[name = string("concat_301"), val = tensor([4, 512, 256])]; - tensor reshape_61_cast_fp16 = reshape(shape = concat_301, x = x_331_cast_fp16)[name = string("reshape_61_cast_fp16")]; - bool matmul_20_transpose_x_0 = const()[name = string("matmul_20_transpose_x_0"), val = bool(false)]; - bool matmul_20_transpose_y_0 = const()[name = string("matmul_20_transpose_y_0"), val = bool(false)]; - tensor matmul_20_cast_fp16 = matmul(transpose_x = matmul_20_transpose_x_0, transpose_y = matmul_20_transpose_y_0, x = reshape_60_cast_fp16, y = reshape_61_cast_fp16)[name = string("matmul_20_cast_fp16")]; - tensor concat_305 = const()[name = string("concat_305"), val = tensor([1, 4, 64, 256])]; - tensor reshape_62_cast_fp16 = reshape(shape = concat_305, x = matmul_20_cast_fp16)[name = string("reshape_62_cast_fp16")]; - tensor var_17076_perm_0 = const()[name = string("op_17076_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_17095 = const()[name = string("op_17095"), val = tensor([1, 64, 1024])]; - tensor var_17076_cast_fp16 = transpose(perm = var_17076_perm_0, x = reshape_62_cast_fp16)[name = string("transpose_49")]; - tensor attn_output_205_cast_fp16 = reshape(shape = var_17095, x = var_17076_cast_fp16)[name = string("attn_output_205_cast_fp16")]; - tensor var_17100 = const()[name = string("op_17100"), val = tensor([0, 2, 1])]; - string var_17116_pad_type_0 = const()[name = string("op_17116_pad_type_0"), val = string("valid")]; - int32 var_17116_groups_0 = const()[name = string("op_17116_groups_0"), val = int32(1)]; - tensor var_17116_strides_0 = const()[name = string("op_17116_strides_0"), val = tensor([1])]; - tensor var_17116_pad_0 = const()[name = string("op_17116_pad_0"), val = tensor([0, 0])]; - tensor var_17116_dilations_0 = const()[name = string("op_17116_dilations_0"), val = tensor([1])]; - tensor squeeze_20_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433331200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434216000))))[name = string("squeeze_20_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_17101_cast_fp16 = transpose(perm = var_17100, x = attn_output_205_cast_fp16)[name = string("transpose_48")]; - tensor var_17116_cast_fp16 = conv(dilations = var_17116_dilations_0, groups = var_17116_groups_0, pad = var_17116_pad_0, pad_type = var_17116_pad_type_0, strides = var_17116_strides_0, weight = squeeze_20_cast_fp16_to_fp32_to_fp16_palettized, x = var_17101_cast_fp16)[name = string("op_17116_cast_fp16")]; - tensor var_17120 = const()[name = string("op_17120"), val = tensor([0, 2, 1])]; - int32 var_17131 = const()[name = string("op_17131"), val = int32(-1)]; - fp16 const_873_promoted_to_fp16 = const()[name = string("const_873_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_329_cast_fp16 = transpose(perm = var_17120, x = var_17116_cast_fp16)[name = string("transpose_47")]; - tensor var_17133_cast_fp16 = mul(x = hidden_states_329_cast_fp16, y = const_873_promoted_to_fp16)[name = string("op_17133_cast_fp16")]; - bool input_411_interleave_0 = const()[name = string("input_411_interleave_0"), val = bool(false)]; - tensor input_411_cast_fp16 = concat(axis = var_17131, interleave = input_411_interleave_0, values = (hidden_states_329_cast_fp16, var_17133_cast_fp16))[name = string("input_411_cast_fp16")]; - tensor normed_493_axes_0 = const()[name = string("normed_493_axes_0"), val = tensor([-1])]; - fp16 var_17128_to_fp16 = const()[name = string("op_17128_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_493_cast_fp16 = layer_norm(axes = normed_493_axes_0, epsilon = var_17128_to_fp16, x = input_411_cast_fp16)[name = string("normed_493_cast_fp16")]; - tensor normed_495_begin_0 = const()[name = string("normed_495_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_495_end_0 = const()[name = string("normed_495_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_495_end_mask_0 = const()[name = string("normed_495_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_495_cast_fp16 = slice_by_index(begin = normed_495_begin_0, end = normed_495_end_0, end_mask = normed_495_end_mask_0, x = normed_493_cast_fp16)[name = string("normed_495_cast_fp16")]; - tensor var_17147_to_fp16 = const()[name = string("op_17147_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434234496)))]; - tensor attn_output_209_cast_fp16 = mul(x = normed_495_cast_fp16, y = var_17147_to_fp16)[name = string("attn_output_209_cast_fp16")]; - tensor hidden_states_331_cast_fp16 = add(x = hidden_states_321_cast_fp16, y = attn_output_209_cast_fp16)[name = string("hidden_states_331_cast_fp16")]; - int32 var_17160 = const()[name = string("op_17160"), val = int32(-1)]; - fp16 const_877_promoted_to_fp16 = const()[name = string("const_877_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17162_cast_fp16 = mul(x = hidden_states_331_cast_fp16, y = const_877_promoted_to_fp16)[name = string("op_17162_cast_fp16")]; - bool input_413_interleave_0 = const()[name = string("input_413_interleave_0"), val = bool(false)]; - tensor input_413_cast_fp16 = concat(axis = var_17160, interleave = input_413_interleave_0, values = (hidden_states_331_cast_fp16, var_17162_cast_fp16))[name = string("input_413_cast_fp16")]; - tensor normed_497_axes_0 = const()[name = string("normed_497_axes_0"), val = tensor([-1])]; - fp16 var_17157_to_fp16 = const()[name = string("op_17157_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_497_cast_fp16 = layer_norm(axes = normed_497_axes_0, epsilon = var_17157_to_fp16, x = input_413_cast_fp16)[name = string("normed_497_cast_fp16")]; - tensor normed_499_begin_0 = const()[name = string("normed_499_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_499_end_0 = const()[name = string("normed_499_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_499_end_mask_0 = const()[name = string("normed_499_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_499_cast_fp16 = slice_by_index(begin = normed_499_begin_0, end = normed_499_end_0, end_mask = normed_499_end_mask_0, x = normed_497_cast_fp16)[name = string("normed_499_cast_fp16")]; - tensor var_17176_to_fp16 = const()[name = string("op_17176_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434236864)))]; - tensor x_333_cast_fp16 = mul(x = normed_499_cast_fp16, y = var_17176_to_fp16)[name = string("x_333_cast_fp16")]; - tensor var_17188 = const()[name = string("op_17188"), val = tensor([0, 2, 1])]; - tensor input_415_axes_0 = const()[name = string("input_415_axes_0"), val = tensor([2])]; - tensor var_17189_cast_fp16 = transpose(perm = var_17188, x = x_333_cast_fp16)[name = string("transpose_46")]; - tensor input_415_cast_fp16 = expand_dims(axes = input_415_axes_0, x = var_17189_cast_fp16)[name = string("input_415_cast_fp16")]; - string x_335_pad_type_0 = const()[name = string("x_335_pad_type_0"), val = string("valid")]; - tensor x_335_strides_0 = const()[name = string("x_335_strides_0"), val = tensor([1, 1])]; - tensor x_335_pad_0 = const()[name = string("x_335_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_335_dilations_0 = const()[name = string("x_335_dilations_0"), val = tensor([1, 1])]; - int32 x_335_groups_0 = const()[name = string("x_335_groups_0"), val = int32(1)]; - tensor model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1434742720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1440714752))))[name = string("model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_335_cast_fp16 = conv(dilations = x_335_dilations_0, groups = x_335_groups_0, pad = x_335_pad_0, pad_type = x_335_pad_type_0, strides = x_335_strides_0, weight = model_model_layers_20_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_415_cast_fp16)[name = string("x_335_cast_fp16")]; - string b_41_pad_type_0 = const()[name = string("b_41_pad_type_0"), val = string("valid")]; - tensor b_41_strides_0 = const()[name = string("b_41_strides_0"), val = tensor([1, 1])]; - tensor b_41_pad_0 = const()[name = string("b_41_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_41_dilations_0 = const()[name = string("b_41_dilations_0"), val = tensor([1, 1])]; - int32 b_41_groups_0 = const()[name = string("b_41_groups_0"), val = int32(1)]; - tensor model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1440825408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1446797440))))[name = string("model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_41_cast_fp16 = conv(dilations = b_41_dilations_0, groups = b_41_groups_0, pad = b_41_pad_0, pad_type = b_41_pad_type_0, strides = b_41_strides_0, weight = model_model_layers_20_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_415_cast_fp16)[name = string("b_41_cast_fp16")]; - string var_17214_mode_0 = const()[name = string("op_17214_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_17214_cast_fp16 = gelu(mode = var_17214_mode_0, x = x_335_cast_fp16)[name = string("op_17214_cast_fp16")]; - tensor input_417_cast_fp16 = mul(x = var_17214_cast_fp16, y = b_41_cast_fp16)[name = string("input_417_cast_fp16")]; - string e_41_pad_type_0 = const()[name = string("e_41_pad_type_0"), val = string("valid")]; - tensor e_41_strides_0 = const()[name = string("e_41_strides_0"), val = tensor([1, 1])]; - tensor e_41_pad_0 = const()[name = string("e_41_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_41_dilations_0 = const()[name = string("e_41_dilations_0"), val = tensor([1, 1])]; - int32 e_41_groups_0 = const()[name = string("e_41_groups_0"), val = int32(1)]; - tensor model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446404608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452376640))))[name = string("model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_41_cast_fp16 = conv(dilations = e_41_dilations_0, groups = e_41_groups_0, pad = e_41_pad_0, pad_type = e_41_pad_type_0, strides = e_41_strides_0, weight = model_model_layers_20_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_417_cast_fp16)[name = string("e_41_cast_fp16")]; - tensor var_17222_axes_0 = const()[name = string("op_17222_axes_0"), val = tensor([2])]; - tensor var_17222_cast_fp16 = squeeze(axes = var_17222_axes_0, x = e_41_cast_fp16)[name = string("op_17222_cast_fp16")]; - tensor var_17223 = const()[name = string("op_17223"), val = tensor([0, 2, 1])]; - int32 var_17234 = const()[name = string("op_17234"), val = int32(-1)]; - fp16 const_881_promoted_to_fp16 = const()[name = string("const_881_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_333_cast_fp16 = transpose(perm = var_17223, x = var_17222_cast_fp16)[name = string("transpose_45")]; - tensor var_17236_cast_fp16 = mul(x = hidden_states_333_cast_fp16, y = const_881_promoted_to_fp16)[name = string("op_17236_cast_fp16")]; - bool input_419_interleave_0 = const()[name = string("input_419_interleave_0"), val = bool(false)]; - tensor input_419_cast_fp16 = concat(axis = var_17234, interleave = input_419_interleave_0, values = (hidden_states_333_cast_fp16, var_17236_cast_fp16))[name = string("input_419_cast_fp16")]; - tensor normed_501_axes_0 = const()[name = string("normed_501_axes_0"), val = tensor([-1])]; - fp16 var_17231_to_fp16 = const()[name = string("op_17231_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_501_cast_fp16 = layer_norm(axes = normed_501_axes_0, epsilon = var_17231_to_fp16, x = input_419_cast_fp16)[name = string("normed_501_cast_fp16")]; - tensor normed_503_begin_0 = const()[name = string("normed_503_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_503_end_0 = const()[name = string("normed_503_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_503_end_mask_0 = const()[name = string("normed_503_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_503_cast_fp16 = slice_by_index(begin = normed_503_begin_0, end = normed_503_end_0, end_mask = normed_503_end_mask_0, x = normed_501_cast_fp16)[name = string("normed_503_cast_fp16")]; - tensor var_17250_to_fp16 = const()[name = string("op_17250_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452395136)))]; - tensor hidden_states_335_cast_fp16 = mul(x = normed_503_cast_fp16, y = var_17250_to_fp16)[name = string("hidden_states_335_cast_fp16")]; - tensor hidden_states_337_cast_fp16 = add(x = hidden_states_331_cast_fp16, y = hidden_states_335_cast_fp16)[name = string("hidden_states_337_cast_fp16")]; - int32 var_17304 = const()[name = string("op_17304"), val = int32(-1)]; - fp16 const_886_promoted_to_fp16 = const()[name = string("const_886_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17306_cast_fp16 = mul(x = hidden_states_337_cast_fp16, y = const_886_promoted_to_fp16)[name = string("op_17306_cast_fp16")]; - bool input_421_interleave_0 = const()[name = string("input_421_interleave_0"), val = bool(false)]; - tensor input_421_cast_fp16 = concat(axis = var_17304, interleave = input_421_interleave_0, values = (hidden_states_337_cast_fp16, var_17306_cast_fp16))[name = string("input_421_cast_fp16")]; - tensor normed_505_axes_0 = const()[name = string("normed_505_axes_0"), val = tensor([-1])]; - fp16 var_17301_to_fp16 = const()[name = string("op_17301_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_505_cast_fp16 = layer_norm(axes = normed_505_axes_0, epsilon = var_17301_to_fp16, x = input_421_cast_fp16)[name = string("normed_505_cast_fp16")]; - tensor normed_507_begin_0 = const()[name = string("normed_507_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_507_end_0 = const()[name = string("normed_507_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_507_end_mask_0 = const()[name = string("normed_507_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_507_cast_fp16 = slice_by_index(begin = normed_507_begin_0, end = normed_507_end_0, end_mask = normed_507_end_mask_0, x = normed_505_cast_fp16)[name = string("normed_507_cast_fp16")]; - tensor var_17320_to_fp16 = const()[name = string("op_17320_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452397504)))]; - tensor hidden_states_339_cast_fp16 = mul(x = normed_507_cast_fp16, y = var_17320_to_fp16)[name = string("hidden_states_339_cast_fp16")]; - tensor var_17331 = const()[name = string("op_17331"), val = tensor([0, 2, 1])]; - tensor var_17334_axes_0 = const()[name = string("op_17334_axes_0"), val = tensor([2])]; - tensor var_17332_cast_fp16 = transpose(perm = var_17331, x = hidden_states_339_cast_fp16)[name = string("transpose_44")]; - tensor var_17334_cast_fp16 = expand_dims(axes = var_17334_axes_0, x = var_17332_cast_fp16)[name = string("op_17334_cast_fp16")]; - string query_states_169_pad_type_0 = const()[name = string("query_states_169_pad_type_0"), val = string("valid")]; - tensor query_states_169_strides_0 = const()[name = string("query_states_169_strides_0"), val = tensor([1, 1])]; - tensor query_states_169_pad_0 = const()[name = string("query_states_169_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_169_dilations_0 = const()[name = string("query_states_169_dilations_0"), val = tensor([1, 1])]; - int32 query_states_169_groups_0 = const()[name = string("query_states_169_groups_0"), val = int32(1)]; - tensor query_states_169 = conv(dilations = query_states_169_dilations_0, groups = query_states_169_groups_0, pad = query_states_169_pad_0, pad_type = query_states_169_pad_type_0, strides = query_states_169_strides_0, weight = model_model_layers_21_self_attn_q_proj_weight_palettized, x = var_17334_cast_fp16)[name = string("query_states_169")]; - string key_states_211_pad_type_0 = const()[name = string("key_states_211_pad_type_0"), val = string("valid")]; - tensor key_states_211_strides_0 = const()[name = string("key_states_211_strides_0"), val = tensor([1, 1])]; - tensor key_states_211_pad_0 = const()[name = string("key_states_211_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_211_dilations_0 = const()[name = string("key_states_211_dilations_0"), val = tensor([1, 1])]; - int32 key_states_211_groups_0 = const()[name = string("key_states_211_groups_0"), val = int32(1)]; - tensor key_states_211 = conv(dilations = key_states_211_dilations_0, groups = key_states_211_groups_0, pad = key_states_211_pad_0, pad_type = key_states_211_pad_type_0, strides = key_states_211_strides_0, weight = model_model_layers_21_self_attn_k_proj_weight_palettized, x = var_17334_cast_fp16)[name = string("key_states_211")]; - string value_states_169_pad_type_0 = const()[name = string("value_states_169_pad_type_0"), val = string("valid")]; - tensor value_states_169_strides_0 = const()[name = string("value_states_169_strides_0"), val = tensor([1, 1])]; - tensor value_states_169_pad_0 = const()[name = string("value_states_169_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_169_dilations_0 = const()[name = string("value_states_169_dilations_0"), val = tensor([1, 1])]; - int32 value_states_169_groups_0 = const()[name = string("value_states_169_groups_0"), val = int32(1)]; - tensor value_states_169 = conv(dilations = value_states_169_dilations_0, groups = value_states_169_groups_0, pad = value_states_169_pad_0, pad_type = value_states_169_pad_type_0, strides = value_states_169_strides_0, weight = model_model_layers_21_self_attn_v_proj_weight_palettized, x = var_17334_cast_fp16)[name = string("value_states_169")]; - tensor var_17376 = const()[name = string("op_17376"), val = tensor([1, 4, 256, 64])]; - tensor var_17377 = reshape(shape = var_17376, x = query_states_169)[name = string("op_17377")]; - tensor var_17382 = const()[name = string("op_17382"), val = tensor([0, 1, 3, 2])]; - tensor var_17387 = const()[name = string("op_17387"), val = tensor([1, 1, 256, 64])]; - tensor var_17388 = reshape(shape = var_17387, x = key_states_211)[name = string("op_17388")]; - tensor var_17393 = const()[name = string("op_17393"), val = tensor([0, 1, 3, 2])]; - tensor var_17398 = const()[name = string("op_17398"), val = tensor([1, 1, 256, 64])]; - tensor var_17399 = reshape(shape = var_17398, x = value_states_169)[name = string("op_17399")]; - tensor var_17404 = const()[name = string("op_17404"), val = tensor([0, 1, 3, 2])]; - int32 var_17415 = const()[name = string("op_17415"), val = int32(-1)]; - fp16 const_891_promoted = const()[name = string("const_891_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_341 = transpose(perm = var_17382, x = var_17377)[name = string("transpose_43")]; - tensor var_17417 = mul(x = hidden_states_341, y = const_891_promoted)[name = string("op_17417")]; - bool input_425_interleave_0 = const()[name = string("input_425_interleave_0"), val = bool(false)]; - tensor input_425 = concat(axis = var_17415, interleave = input_425_interleave_0, values = (hidden_states_341, var_17417))[name = string("input_425")]; - tensor normed_509_axes_0 = const()[name = string("normed_509_axes_0"), val = tensor([-1])]; - fp16 var_17412_to_fp16 = const()[name = string("op_17412_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_509_cast_fp16 = layer_norm(axes = normed_509_axes_0, epsilon = var_17412_to_fp16, x = input_425)[name = string("normed_509_cast_fp16")]; - tensor normed_511_begin_0 = const()[name = string("normed_511_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_511_end_0 = const()[name = string("normed_511_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_511_end_mask_0 = const()[name = string("normed_511_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_511 = slice_by_index(begin = normed_511_begin_0, end = normed_511_end_0, end_mask = normed_511_end_mask_0, x = normed_509_cast_fp16)[name = string("normed_511")]; - tensor var_17431_to_fp16 = const()[name = string("op_17431_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452399872)))]; - tensor q_43_cast_fp16 = mul(x = normed_511, y = var_17431_to_fp16)[name = string("q_43_cast_fp16")]; - int32 var_17442 = const()[name = string("op_17442"), val = int32(-1)]; - fp16 const_895_promoted = const()[name = string("const_895_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_343 = transpose(perm = var_17393, x = var_17388)[name = string("transpose_42")]; - tensor var_17444 = mul(x = hidden_states_343, y = const_895_promoted)[name = string("op_17444")]; - bool input_427_interleave_0 = const()[name = string("input_427_interleave_0"), val = bool(false)]; - tensor input_427 = concat(axis = var_17442, interleave = input_427_interleave_0, values = (hidden_states_343, var_17444))[name = string("input_427")]; - tensor normed_513_axes_0 = const()[name = string("normed_513_axes_0"), val = tensor([-1])]; - fp16 var_17439_to_fp16 = const()[name = string("op_17439_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_513_cast_fp16 = layer_norm(axes = normed_513_axes_0, epsilon = var_17439_to_fp16, x = input_427)[name = string("normed_513_cast_fp16")]; - tensor normed_515_begin_0 = const()[name = string("normed_515_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_515_end_0 = const()[name = string("normed_515_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_515_end_mask_0 = const()[name = string("normed_515_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_515 = slice_by_index(begin = normed_515_begin_0, end = normed_515_end_0, end_mask = normed_515_end_mask_0, x = normed_513_cast_fp16)[name = string("normed_515")]; - tensor var_17458_to_fp16 = const()[name = string("op_17458_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452400448)))]; - tensor k_43_cast_fp16 = mul(x = normed_515, y = var_17458_to_fp16)[name = string("k_43_cast_fp16")]; - tensor var_17472_cast_fp16 = mul(x = q_43_cast_fp16, y = cos_5)[name = string("op_17472_cast_fp16")]; - tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_85_cast_fp16 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_43_cast_fp16)[name = string("x1_85_cast_fp16")]; - tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_85_cast_fp16 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_43_cast_fp16)[name = string("x2_85_cast_fp16")]; - fp16 const_901_promoted_to_fp16 = const()[name = string("const_901_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17493_cast_fp16 = mul(x = x2_85_cast_fp16, y = const_901_promoted_to_fp16)[name = string("op_17493_cast_fp16")]; - int32 var_17495 = const()[name = string("op_17495"), val = int32(-1)]; - bool var_17496_interleave_0 = const()[name = string("op_17496_interleave_0"), val = bool(false)]; - tensor var_17496_cast_fp16 = concat(axis = var_17495, interleave = var_17496_interleave_0, values = (var_17493_cast_fp16, x1_85_cast_fp16))[name = string("op_17496_cast_fp16")]; - tensor var_17497_cast_fp16 = mul(x = var_17496_cast_fp16, y = sin_5)[name = string("op_17497_cast_fp16")]; - tensor query_states_171_cast_fp16 = add(x = var_17472_cast_fp16, y = var_17497_cast_fp16)[name = string("query_states_171_cast_fp16")]; - tensor var_17500_cast_fp16 = mul(x = k_43_cast_fp16, y = cos_5)[name = string("op_17500_cast_fp16")]; - tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_87_cast_fp16 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_43_cast_fp16)[name = string("x1_87_cast_fp16")]; - tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_87_cast_fp16 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_43_cast_fp16)[name = string("x2_87_cast_fp16")]; - fp16 const_904_promoted_to_fp16 = const()[name = string("const_904_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17521_cast_fp16 = mul(x = x2_87_cast_fp16, y = const_904_promoted_to_fp16)[name = string("op_17521_cast_fp16")]; - int32 var_17523 = const()[name = string("op_17523"), val = int32(-1)]; - bool var_17524_interleave_0 = const()[name = string("op_17524_interleave_0"), val = bool(false)]; - tensor var_17524_cast_fp16 = concat(axis = var_17523, interleave = var_17524_interleave_0, values = (var_17521_cast_fp16, x1_87_cast_fp16))[name = string("op_17524_cast_fp16")]; - tensor var_17525_cast_fp16 = mul(x = var_17524_cast_fp16, y = sin_5)[name = string("op_17525_cast_fp16")]; - tensor key_states_213_cast_fp16 = add(x = var_17500_cast_fp16, y = var_17525_cast_fp16)[name = string("key_states_213_cast_fp16")]; - tensor key_slice_37_begin_0 = const()[name = string("key_slice_37_begin_0"), val = tensor([18, 0, 0, 0])]; - tensor key_slice_37_end_0 = const()[name = string("key_slice_37_end_0"), val = tensor([19, 1, 512, 256])]; - tensor key_slice_37_end_mask_0 = const()[name = string("key_slice_37_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_37_cast_fp16 = slice_by_index(begin = key_slice_37_begin_0, end = key_slice_37_end_0, end_mask = key_slice_37_end_mask_0, x = coreml_update_state_93)[name = string("key_slice_37_cast_fp16")]; - tensor var_17562_begin_0 = const()[name = string("op_17562_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_17562_end_0 = const()[name = string("op_17562_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_17562_end_mask_0 = const()[name = string("op_17562_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_17562_cast_fp16 = slice_by_index(begin = var_17562_begin_0, end = var_17562_end_0, end_mask = var_17562_end_mask_0, x = key_slice_37_cast_fp16)[name = string("op_17562_cast_fp16")]; - int32 var_17589 = const()[name = string("op_17589"), val = int32(2)]; - bool shifted_key_37_interleave_0 = const()[name = string("shifted_key_37_interleave_0"), val = bool(false)]; - tensor shifted_key_37_cast_fp16 = concat(axis = var_17589, interleave = shifted_key_37_interleave_0, values = (var_17562_cast_fp16, key_states_213_cast_fp16))[name = string("shifted_key_37_cast_fp16")]; - tensor concat_306 = const()[name = string("concat_306"), val = tensor([18, 0, 0, 0])]; - tensor concat_307 = const()[name = string("concat_307"), val = tensor([19, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16 = slice_update(begin = concat_306, begin_mask = model_model_kv_cache_local_internal_tensor_assign_37_begin_mask_0, end = concat_307, end_mask = model_model_kv_cache_local_internal_tensor_assign_37_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_37_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_37_stride_0, update = shifted_key_37_cast_fp16, x = coreml_update_state_93)[name = string("model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_37_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_198_write_state")]; - tensor coreml_update_state_94 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_198")]; - tensor value_slice_37_begin_0 = const()[name = string("value_slice_37_begin_0"), val = tensor([40, 0, 0, 0])]; - tensor value_slice_37_end_0 = const()[name = string("value_slice_37_end_0"), val = tensor([41, 1, 512, 256])]; - tensor value_slice_37_end_mask_0 = const()[name = string("value_slice_37_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_37_cast_fp16 = slice_by_index(begin = value_slice_37_begin_0, end = value_slice_37_end_0, end_mask = value_slice_37_end_mask_0, x = coreml_update_state_94)[name = string("value_slice_37_cast_fp16")]; - tensor var_17632_begin_0 = const()[name = string("op_17632_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_17632_end_0 = const()[name = string("op_17632_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_17632_end_mask_0 = const()[name = string("op_17632_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_17632_cast_fp16 = slice_by_index(begin = var_17632_begin_0, end = var_17632_end_0, end_mask = var_17632_end_mask_0, x = value_slice_37_cast_fp16)[name = string("op_17632_cast_fp16")]; - int32 var_17659 = const()[name = string("op_17659"), val = int32(2)]; - bool shifted_value_37_interleave_0 = const()[name = string("shifted_value_37_interleave_0"), val = bool(false)]; - tensor value_states_171 = transpose(perm = var_17404, x = var_17399)[name = string("transpose_41")]; - tensor shifted_value_37_cast_fp16 = concat(axis = var_17659, interleave = shifted_value_37_interleave_0, values = (var_17632_cast_fp16, value_states_171))[name = string("shifted_value_37_cast_fp16")]; - tensor concat_308 = const()[name = string("concat_308"), val = tensor([40, 0, 0, 0])]; - tensor concat_309 = const()[name = string("concat_309"), val = tensor([41, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16 = slice_update(begin = concat_308, begin_mask = model_model_kv_cache_local_internal_tensor_assign_38_begin_mask_0, end = concat_309, end_mask = model_model_kv_cache_local_internal_tensor_assign_38_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_38_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_38_stride_0, update = shifted_value_37_cast_fp16, x = coreml_update_state_94)[name = string("model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_38_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_199_write_state")]; - tensor coreml_update_state_95 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_199")]; - tensor var_17687_begin_0 = const()[name = string("op_17687_begin_0"), val = tensor([18, 0, 0, 0])]; - tensor var_17687_end_0 = const()[name = string("op_17687_end_0"), val = tensor([19, 1, 512, 256])]; - tensor var_17687_end_mask_0 = const()[name = string("op_17687_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_17687_cast_fp16 = slice_by_index(begin = var_17687_begin_0, end = var_17687_end_0, end_mask = var_17687_end_mask_0, x = coreml_update_state_95)[name = string("op_17687_cast_fp16")]; - tensor var_17694_begin_0 = const()[name = string("op_17694_begin_0"), val = tensor([40, 0, 0, 0])]; - tensor var_17694_end_0 = const()[name = string("op_17694_end_0"), val = tensor([41, 1, 512, 256])]; - tensor var_17694_end_mask_0 = const()[name = string("op_17694_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_17694_cast_fp16 = slice_by_index(begin = var_17694_begin_0, end = var_17694_end_0, end_mask = var_17694_end_mask_0, x = coreml_update_state_95)[name = string("op_17694_cast_fp16")]; - tensor var_17733 = const()[name = string("op_17733"), val = tensor([1, 4, 1, 1])]; - tensor x_341_cast_fp16 = tile(reps = var_17733, x = var_17687_cast_fp16)[name = string("x_341_cast_fp16")]; - tensor var_17753 = const()[name = string("op_17753"), val = tensor([1, 4, 1, 1])]; - tensor x_347_cast_fp16 = tile(reps = var_17753, x = var_17694_cast_fp16)[name = string("x_347_cast_fp16")]; - bool var_17780_transpose_x_0 = const()[name = string("op_17780_transpose_x_0"), val = bool(false)]; - bool var_17780_transpose_y_0 = const()[name = string("op_17780_transpose_y_0"), val = bool(true)]; - tensor var_17780 = matmul(transpose_x = var_17780_transpose_x_0, transpose_y = var_17780_transpose_y_0, x = query_states_171_cast_fp16, y = x_341_cast_fp16)[name = string("op_17780")]; - fp16 var_17781_to_fp16 = const()[name = string("op_17781_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_85_cast_fp16 = mul(x = var_17780, y = var_17781_to_fp16)[name = string("attn_weights_85_cast_fp16")]; - tensor attn_weights_87_cast_fp16 = add(x = attn_weights_85_cast_fp16, y = mask_slice_1)[name = string("attn_weights_87_cast_fp16")]; - int32 var_17816 = const()[name = string("op_17816"), val = int32(-1)]; - tensor var_17818_cast_fp16 = softmax(axis = var_17816, x = attn_weights_87_cast_fp16)[name = string("op_17818_cast_fp16")]; - tensor concat_314 = const()[name = string("concat_314"), val = tensor([4, 64, 512])]; - tensor reshape_63_cast_fp16 = reshape(shape = concat_314, x = var_17818_cast_fp16)[name = string("reshape_63_cast_fp16")]; - tensor concat_315 = const()[name = string("concat_315"), val = tensor([4, 512, 256])]; - tensor reshape_64_cast_fp16 = reshape(shape = concat_315, x = x_347_cast_fp16)[name = string("reshape_64_cast_fp16")]; - bool matmul_21_transpose_x_0 = const()[name = string("matmul_21_transpose_x_0"), val = bool(false)]; - bool matmul_21_transpose_y_0 = const()[name = string("matmul_21_transpose_y_0"), val = bool(false)]; - tensor matmul_21_cast_fp16 = matmul(transpose_x = matmul_21_transpose_x_0, transpose_y = matmul_21_transpose_y_0, x = reshape_63_cast_fp16, y = reshape_64_cast_fp16)[name = string("matmul_21_cast_fp16")]; - tensor concat_319 = const()[name = string("concat_319"), val = tensor([1, 4, 64, 256])]; - tensor reshape_65_cast_fp16 = reshape(shape = concat_319, x = matmul_21_cast_fp16)[name = string("reshape_65_cast_fp16")]; - tensor var_17830_perm_0 = const()[name = string("op_17830_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_17849 = const()[name = string("op_17849"), val = tensor([1, 64, 1024])]; - tensor var_17830_cast_fp16 = transpose(perm = var_17830_perm_0, x = reshape_65_cast_fp16)[name = string("transpose_40")]; - tensor attn_output_215_cast_fp16 = reshape(shape = var_17849, x = var_17830_cast_fp16)[name = string("attn_output_215_cast_fp16")]; - tensor var_17854 = const()[name = string("op_17854"), val = tensor([0, 2, 1])]; - string var_17870_pad_type_0 = const()[name = string("op_17870_pad_type_0"), val = string("valid")]; - int32 var_17870_groups_0 = const()[name = string("op_17870_groups_0"), val = int32(1)]; - tensor var_17870_strides_0 = const()[name = string("op_17870_strides_0"), val = tensor([1])]; - tensor var_17870_pad_0 = const()[name = string("op_17870_pad_0"), val = tensor([0, 0])]; - tensor var_17870_dilations_0 = const()[name = string("op_17870_dilations_0"), val = tensor([1])]; - tensor squeeze_21_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(452401024))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453285824))))[name = string("squeeze_21_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_17855_cast_fp16 = transpose(perm = var_17854, x = attn_output_215_cast_fp16)[name = string("transpose_39")]; - tensor var_17870_cast_fp16 = conv(dilations = var_17870_dilations_0, groups = var_17870_groups_0, pad = var_17870_pad_0, pad_type = var_17870_pad_type_0, strides = var_17870_strides_0, weight = squeeze_21_cast_fp16_to_fp32_to_fp16_palettized, x = var_17855_cast_fp16)[name = string("op_17870_cast_fp16")]; - tensor var_17874 = const()[name = string("op_17874"), val = tensor([0, 2, 1])]; - int32 var_17885 = const()[name = string("op_17885"), val = int32(-1)]; - fp16 const_915_promoted_to_fp16 = const()[name = string("const_915_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_345_cast_fp16 = transpose(perm = var_17874, x = var_17870_cast_fp16)[name = string("transpose_38")]; - tensor var_17887_cast_fp16 = mul(x = hidden_states_345_cast_fp16, y = const_915_promoted_to_fp16)[name = string("op_17887_cast_fp16")]; - bool input_431_interleave_0 = const()[name = string("input_431_interleave_0"), val = bool(false)]; - tensor input_431_cast_fp16 = concat(axis = var_17885, interleave = input_431_interleave_0, values = (hidden_states_345_cast_fp16, var_17887_cast_fp16))[name = string("input_431_cast_fp16")]; - tensor normed_517_axes_0 = const()[name = string("normed_517_axes_0"), val = tensor([-1])]; - fp16 var_17882_to_fp16 = const()[name = string("op_17882_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_517_cast_fp16 = layer_norm(axes = normed_517_axes_0, epsilon = var_17882_to_fp16, x = input_431_cast_fp16)[name = string("normed_517_cast_fp16")]; - tensor normed_519_begin_0 = const()[name = string("normed_519_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_519_end_0 = const()[name = string("normed_519_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_519_end_mask_0 = const()[name = string("normed_519_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_519_cast_fp16 = slice_by_index(begin = normed_519_begin_0, end = normed_519_end_0, end_mask = normed_519_end_mask_0, x = normed_517_cast_fp16)[name = string("normed_519_cast_fp16")]; - tensor var_17901_to_fp16 = const()[name = string("op_17901_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453304320)))]; - tensor attn_output_219_cast_fp16 = mul(x = normed_519_cast_fp16, y = var_17901_to_fp16)[name = string("attn_output_219_cast_fp16")]; - tensor hidden_states_347_cast_fp16 = add(x = hidden_states_337_cast_fp16, y = attn_output_219_cast_fp16)[name = string("hidden_states_347_cast_fp16")]; - int32 var_17914 = const()[name = string("op_17914"), val = int32(-1)]; - fp16 const_919_promoted_to_fp16 = const()[name = string("const_919_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_17916_cast_fp16 = mul(x = hidden_states_347_cast_fp16, y = const_919_promoted_to_fp16)[name = string("op_17916_cast_fp16")]; - bool input_433_interleave_0 = const()[name = string("input_433_interleave_0"), val = bool(false)]; - tensor input_433_cast_fp16 = concat(axis = var_17914, interleave = input_433_interleave_0, values = (hidden_states_347_cast_fp16, var_17916_cast_fp16))[name = string("input_433_cast_fp16")]; - tensor normed_521_axes_0 = const()[name = string("normed_521_axes_0"), val = tensor([-1])]; - fp16 var_17911_to_fp16 = const()[name = string("op_17911_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_521_cast_fp16 = layer_norm(axes = normed_521_axes_0, epsilon = var_17911_to_fp16, x = input_433_cast_fp16)[name = string("normed_521_cast_fp16")]; - tensor normed_523_begin_0 = const()[name = string("normed_523_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_523_end_0 = const()[name = string("normed_523_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_523_end_mask_0 = const()[name = string("normed_523_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_523_cast_fp16 = slice_by_index(begin = normed_523_begin_0, end = normed_523_end_0, end_mask = normed_523_end_mask_0, x = normed_521_cast_fp16)[name = string("normed_523_cast_fp16")]; - tensor var_17930_to_fp16 = const()[name = string("op_17930_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453306688)))]; - tensor x_349_cast_fp16 = mul(x = normed_523_cast_fp16, y = var_17930_to_fp16)[name = string("x_349_cast_fp16")]; - tensor var_17942 = const()[name = string("op_17942"), val = tensor([0, 2, 1])]; - tensor input_435_axes_0 = const()[name = string("input_435_axes_0"), val = tensor([2])]; - tensor var_17943_cast_fp16 = transpose(perm = var_17942, x = x_349_cast_fp16)[name = string("transpose_37")]; - tensor input_435_cast_fp16 = expand_dims(axes = input_435_axes_0, x = var_17943_cast_fp16)[name = string("input_435_cast_fp16")]; - string x_351_pad_type_0 = const()[name = string("x_351_pad_type_0"), val = string("valid")]; - tensor x_351_strides_0 = const()[name = string("x_351_strides_0"), val = tensor([1, 1])]; - tensor x_351_pad_0 = const()[name = string("x_351_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_351_dilations_0 = const()[name = string("x_351_dilations_0"), val = tensor([1, 1])]; - int32 x_351_groups_0 = const()[name = string("x_351_groups_0"), val = int32(1)]; - tensor model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1446908096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1452880128))))[name = string("model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_351_cast_fp16 = conv(dilations = x_351_dilations_0, groups = x_351_groups_0, pad = x_351_pad_0, pad_type = x_351_pad_type_0, strides = x_351_strides_0, weight = model_model_layers_21_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_435_cast_fp16)[name = string("x_351_cast_fp16")]; - string b_43_pad_type_0 = const()[name = string("b_43_pad_type_0"), val = string("valid")]; - tensor b_43_strides_0 = const()[name = string("b_43_strides_0"), val = tensor([1, 1])]; - tensor b_43_pad_0 = const()[name = string("b_43_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_43_dilations_0 = const()[name = string("b_43_dilations_0"), val = tensor([1, 1])]; - int32 b_43_groups_0 = const()[name = string("b_43_groups_0"), val = int32(1)]; - tensor model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1452990784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1458962816))))[name = string("model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_43_cast_fp16 = conv(dilations = b_43_dilations_0, groups = b_43_groups_0, pad = b_43_pad_0, pad_type = b_43_pad_type_0, strides = b_43_strides_0, weight = model_model_layers_21_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_435_cast_fp16)[name = string("b_43_cast_fp16")]; - string var_17968_mode_0 = const()[name = string("op_17968_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_17968_cast_fp16 = gelu(mode = var_17968_mode_0, x = x_351_cast_fp16)[name = string("op_17968_cast_fp16")]; - tensor input_437_cast_fp16 = mul(x = var_17968_cast_fp16, y = b_43_cast_fp16)[name = string("input_437_cast_fp16")]; - string e_43_pad_type_0 = const()[name = string("e_43_pad_type_0"), val = string("valid")]; - tensor e_43_strides_0 = const()[name = string("e_43_strides_0"), val = tensor([1, 1])]; - tensor e_43_pad_0 = const()[name = string("e_43_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_43_dilations_0 = const()[name = string("e_43_dilations_0"), val = tensor([1, 1])]; - int32 e_43_groups_0 = const()[name = string("e_43_groups_0"), val = int32(1)]; - tensor model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465474432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471446464))))[name = string("model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_43_cast_fp16 = conv(dilations = e_43_dilations_0, groups = e_43_groups_0, pad = e_43_pad_0, pad_type = e_43_pad_type_0, strides = e_43_strides_0, weight = model_model_layers_21_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_437_cast_fp16)[name = string("e_43_cast_fp16")]; - tensor var_17976_axes_0 = const()[name = string("op_17976_axes_0"), val = tensor([2])]; - tensor var_17976_cast_fp16 = squeeze(axes = var_17976_axes_0, x = e_43_cast_fp16)[name = string("op_17976_cast_fp16")]; - tensor var_17977 = const()[name = string("op_17977"), val = tensor([0, 2, 1])]; - int32 var_17988 = const()[name = string("op_17988"), val = int32(-1)]; - fp16 const_923_promoted_to_fp16 = const()[name = string("const_923_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_349_cast_fp16 = transpose(perm = var_17977, x = var_17976_cast_fp16)[name = string("transpose_36")]; - tensor var_17990_cast_fp16 = mul(x = hidden_states_349_cast_fp16, y = const_923_promoted_to_fp16)[name = string("op_17990_cast_fp16")]; - bool input_439_interleave_0 = const()[name = string("input_439_interleave_0"), val = bool(false)]; - tensor input_439_cast_fp16 = concat(axis = var_17988, interleave = input_439_interleave_0, values = (hidden_states_349_cast_fp16, var_17990_cast_fp16))[name = string("input_439_cast_fp16")]; - tensor normed_525_axes_0 = const()[name = string("normed_525_axes_0"), val = tensor([-1])]; - fp16 var_17985_to_fp16 = const()[name = string("op_17985_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_525_cast_fp16 = layer_norm(axes = normed_525_axes_0, epsilon = var_17985_to_fp16, x = input_439_cast_fp16)[name = string("normed_525_cast_fp16")]; - tensor normed_527_begin_0 = const()[name = string("normed_527_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_527_end_0 = const()[name = string("normed_527_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_527_end_mask_0 = const()[name = string("normed_527_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_527_cast_fp16 = slice_by_index(begin = normed_527_begin_0, end = normed_527_end_0, end_mask = normed_527_end_mask_0, x = normed_525_cast_fp16)[name = string("normed_527_cast_fp16")]; - tensor var_18004_to_fp16 = const()[name = string("op_18004_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471464960)))]; - tensor hidden_states_351_cast_fp16 = mul(x = normed_527_cast_fp16, y = var_18004_to_fp16)[name = string("hidden_states_351_cast_fp16")]; - tensor hidden_states_353_cast_fp16 = add(x = hidden_states_347_cast_fp16, y = hidden_states_351_cast_fp16)[name = string("hidden_states_353_cast_fp16")]; - int32 var_18058 = const()[name = string("op_18058"), val = int32(-1)]; - fp16 const_928_promoted_to_fp16 = const()[name = string("const_928_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_18060_cast_fp16 = mul(x = hidden_states_353_cast_fp16, y = const_928_promoted_to_fp16)[name = string("op_18060_cast_fp16")]; - bool input_441_interleave_0 = const()[name = string("input_441_interleave_0"), val = bool(false)]; - tensor input_441_cast_fp16 = concat(axis = var_18058, interleave = input_441_interleave_0, values = (hidden_states_353_cast_fp16, var_18060_cast_fp16))[name = string("input_441_cast_fp16")]; - tensor normed_529_axes_0 = const()[name = string("normed_529_axes_0"), val = tensor([-1])]; - fp16 var_18055_to_fp16 = const()[name = string("op_18055_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_529_cast_fp16 = layer_norm(axes = normed_529_axes_0, epsilon = var_18055_to_fp16, x = input_441_cast_fp16)[name = string("normed_529_cast_fp16")]; - tensor normed_531_begin_0 = const()[name = string("normed_531_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_531_end_0 = const()[name = string("normed_531_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_531_end_mask_0 = const()[name = string("normed_531_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_531_cast_fp16 = slice_by_index(begin = normed_531_begin_0, end = normed_531_end_0, end_mask = normed_531_end_mask_0, x = normed_529_cast_fp16)[name = string("normed_531_cast_fp16")]; - tensor var_18074_to_fp16 = const()[name = string("op_18074_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471467328)))]; - tensor hidden_states_355_cast_fp16 = mul(x = normed_531_cast_fp16, y = var_18074_to_fp16)[name = string("hidden_states_355_cast_fp16")]; - tensor var_18085 = const()[name = string("op_18085"), val = tensor([0, 2, 1])]; - tensor var_18088_axes_0 = const()[name = string("op_18088_axes_0"), val = tensor([2])]; - tensor var_18086_cast_fp16 = transpose(perm = var_18085, x = hidden_states_355_cast_fp16)[name = string("transpose_35")]; - tensor var_18088_cast_fp16 = expand_dims(axes = var_18088_axes_0, x = var_18086_cast_fp16)[name = string("op_18088_cast_fp16")]; - string query_states_177_pad_type_0 = const()[name = string("query_states_177_pad_type_0"), val = string("valid")]; - tensor query_states_177_strides_0 = const()[name = string("query_states_177_strides_0"), val = tensor([1, 1])]; - tensor query_states_177_pad_0 = const()[name = string("query_states_177_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_177_dilations_0 = const()[name = string("query_states_177_dilations_0"), val = tensor([1, 1])]; - int32 query_states_177_groups_0 = const()[name = string("query_states_177_groups_0"), val = int32(1)]; - tensor query_states_177 = conv(dilations = query_states_177_dilations_0, groups = query_states_177_groups_0, pad = query_states_177_pad_0, pad_type = query_states_177_pad_type_0, strides = query_states_177_strides_0, weight = model_model_layers_22_self_attn_q_proj_weight_palettized, x = var_18088_cast_fp16)[name = string("query_states_177")]; - string key_states_221_pad_type_0 = const()[name = string("key_states_221_pad_type_0"), val = string("valid")]; - tensor key_states_221_strides_0 = const()[name = string("key_states_221_strides_0"), val = tensor([1, 1])]; - tensor key_states_221_pad_0 = const()[name = string("key_states_221_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_221_dilations_0 = const()[name = string("key_states_221_dilations_0"), val = tensor([1, 1])]; - int32 key_states_221_groups_0 = const()[name = string("key_states_221_groups_0"), val = int32(1)]; - tensor key_states_221 = conv(dilations = key_states_221_dilations_0, groups = key_states_221_groups_0, pad = key_states_221_pad_0, pad_type = key_states_221_pad_type_0, strides = key_states_221_strides_0, weight = model_model_layers_22_self_attn_k_proj_weight_palettized, x = var_18088_cast_fp16)[name = string("key_states_221")]; - string value_states_177_pad_type_0 = const()[name = string("value_states_177_pad_type_0"), val = string("valid")]; - tensor value_states_177_strides_0 = const()[name = string("value_states_177_strides_0"), val = tensor([1, 1])]; - tensor value_states_177_pad_0 = const()[name = string("value_states_177_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_177_dilations_0 = const()[name = string("value_states_177_dilations_0"), val = tensor([1, 1])]; - int32 value_states_177_groups_0 = const()[name = string("value_states_177_groups_0"), val = int32(1)]; - tensor value_states_177 = conv(dilations = value_states_177_dilations_0, groups = value_states_177_groups_0, pad = value_states_177_pad_0, pad_type = value_states_177_pad_type_0, strides = value_states_177_strides_0, weight = model_model_layers_22_self_attn_v_proj_weight_palettized, x = var_18088_cast_fp16)[name = string("value_states_177")]; - tensor var_18130 = const()[name = string("op_18130"), val = tensor([1, 4, 256, 64])]; - tensor var_18131 = reshape(shape = var_18130, x = query_states_177)[name = string("op_18131")]; - tensor var_18136 = const()[name = string("op_18136"), val = tensor([0, 1, 3, 2])]; - tensor var_18141 = const()[name = string("op_18141"), val = tensor([1, 1, 256, 64])]; - tensor var_18142 = reshape(shape = var_18141, x = key_states_221)[name = string("op_18142")]; - tensor var_18147 = const()[name = string("op_18147"), val = tensor([0, 1, 3, 2])]; - tensor var_18152 = const()[name = string("op_18152"), val = tensor([1, 1, 256, 64])]; - tensor var_18153 = reshape(shape = var_18152, x = value_states_177)[name = string("op_18153")]; - tensor var_18158 = const()[name = string("op_18158"), val = tensor([0, 1, 3, 2])]; - int32 var_18169 = const()[name = string("op_18169"), val = int32(-1)]; - fp16 const_933_promoted = const()[name = string("const_933_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_357 = transpose(perm = var_18136, x = var_18131)[name = string("transpose_34")]; - tensor var_18171 = mul(x = hidden_states_357, y = const_933_promoted)[name = string("op_18171")]; - bool input_445_interleave_0 = const()[name = string("input_445_interleave_0"), val = bool(false)]; - tensor input_445 = concat(axis = var_18169, interleave = input_445_interleave_0, values = (hidden_states_357, var_18171))[name = string("input_445")]; - tensor normed_533_axes_0 = const()[name = string("normed_533_axes_0"), val = tensor([-1])]; - fp16 var_18166_to_fp16 = const()[name = string("op_18166_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_533_cast_fp16 = layer_norm(axes = normed_533_axes_0, epsilon = var_18166_to_fp16, x = input_445)[name = string("normed_533_cast_fp16")]; - tensor normed_535_begin_0 = const()[name = string("normed_535_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_535_end_0 = const()[name = string("normed_535_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_535_end_mask_0 = const()[name = string("normed_535_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_535 = slice_by_index(begin = normed_535_begin_0, end = normed_535_end_0, end_mask = normed_535_end_mask_0, x = normed_533_cast_fp16)[name = string("normed_535")]; - tensor var_18185_to_fp16 = const()[name = string("op_18185_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471469696)))]; - tensor q_45_cast_fp16 = mul(x = normed_535, y = var_18185_to_fp16)[name = string("q_45_cast_fp16")]; - int32 var_18196 = const()[name = string("op_18196"), val = int32(-1)]; - fp16 const_937_promoted = const()[name = string("const_937_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_359 = transpose(perm = var_18147, x = var_18142)[name = string("transpose_33")]; - tensor var_18198 = mul(x = hidden_states_359, y = const_937_promoted)[name = string("op_18198")]; - bool input_447_interleave_0 = const()[name = string("input_447_interleave_0"), val = bool(false)]; - tensor input_447 = concat(axis = var_18196, interleave = input_447_interleave_0, values = (hidden_states_359, var_18198))[name = string("input_447")]; - tensor normed_537_axes_0 = const()[name = string("normed_537_axes_0"), val = tensor([-1])]; - fp16 var_18193_to_fp16 = const()[name = string("op_18193_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_537_cast_fp16 = layer_norm(axes = normed_537_axes_0, epsilon = var_18193_to_fp16, x = input_447)[name = string("normed_537_cast_fp16")]; - tensor normed_539_begin_0 = const()[name = string("normed_539_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_539_end_0 = const()[name = string("normed_539_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_539_end_mask_0 = const()[name = string("normed_539_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_539 = slice_by_index(begin = normed_539_begin_0, end = normed_539_end_0, end_mask = normed_539_end_mask_0, x = normed_537_cast_fp16)[name = string("normed_539")]; - tensor var_18212_to_fp16 = const()[name = string("op_18212_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471470272)))]; - tensor k_45_cast_fp16 = mul(x = normed_539, y = var_18212_to_fp16)[name = string("k_45_cast_fp16")]; - tensor var_18226_cast_fp16 = mul(x = q_45_cast_fp16, y = cos_5)[name = string("op_18226_cast_fp16")]; - tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_89_cast_fp16 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_45_cast_fp16)[name = string("x1_89_cast_fp16")]; - tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_89_cast_fp16 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_45_cast_fp16)[name = string("x2_89_cast_fp16")]; - fp16 const_943_promoted_to_fp16 = const()[name = string("const_943_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_18247_cast_fp16 = mul(x = x2_89_cast_fp16, y = const_943_promoted_to_fp16)[name = string("op_18247_cast_fp16")]; - int32 var_18249 = const()[name = string("op_18249"), val = int32(-1)]; - bool var_18250_interleave_0 = const()[name = string("op_18250_interleave_0"), val = bool(false)]; - tensor var_18250_cast_fp16 = concat(axis = var_18249, interleave = var_18250_interleave_0, values = (var_18247_cast_fp16, x1_89_cast_fp16))[name = string("op_18250_cast_fp16")]; - tensor var_18251_cast_fp16 = mul(x = var_18250_cast_fp16, y = sin_5)[name = string("op_18251_cast_fp16")]; - tensor query_states_179_cast_fp16 = add(x = var_18226_cast_fp16, y = var_18251_cast_fp16)[name = string("query_states_179_cast_fp16")]; - tensor var_18254_cast_fp16 = mul(x = k_45_cast_fp16, y = cos_5)[name = string("op_18254_cast_fp16")]; - tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_91_cast_fp16 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_45_cast_fp16)[name = string("x1_91_cast_fp16")]; - tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_91_cast_fp16 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_45_cast_fp16)[name = string("x2_91_cast_fp16")]; - fp16 const_946_promoted_to_fp16 = const()[name = string("const_946_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_18275_cast_fp16 = mul(x = x2_91_cast_fp16, y = const_946_promoted_to_fp16)[name = string("op_18275_cast_fp16")]; - int32 var_18277 = const()[name = string("op_18277"), val = int32(-1)]; - bool var_18278_interleave_0 = const()[name = string("op_18278_interleave_0"), val = bool(false)]; - tensor var_18278_cast_fp16 = concat(axis = var_18277, interleave = var_18278_interleave_0, values = (var_18275_cast_fp16, x1_91_cast_fp16))[name = string("op_18278_cast_fp16")]; - tensor var_18279_cast_fp16 = mul(x = var_18278_cast_fp16, y = sin_5)[name = string("op_18279_cast_fp16")]; - tensor key_states_223_cast_fp16 = add(x = var_18254_cast_fp16, y = var_18279_cast_fp16)[name = string("key_states_223_cast_fp16")]; - tensor key_slice_39_begin_0 = const()[name = string("key_slice_39_begin_0"), val = tensor([19, 0, 0, 0])]; - tensor key_slice_39_end_0 = const()[name = string("key_slice_39_end_0"), val = tensor([20, 1, 512, 256])]; - tensor key_slice_39_end_mask_0 = const()[name = string("key_slice_39_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_39_cast_fp16 = slice_by_index(begin = key_slice_39_begin_0, end = key_slice_39_end_0, end_mask = key_slice_39_end_mask_0, x = coreml_update_state_95)[name = string("key_slice_39_cast_fp16")]; - tensor var_18316_begin_0 = const()[name = string("op_18316_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_18316_end_0 = const()[name = string("op_18316_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_18316_end_mask_0 = const()[name = string("op_18316_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_18316_cast_fp16 = slice_by_index(begin = var_18316_begin_0, end = var_18316_end_0, end_mask = var_18316_end_mask_0, x = key_slice_39_cast_fp16)[name = string("op_18316_cast_fp16")]; - int32 var_18343 = const()[name = string("op_18343"), val = int32(2)]; - bool shifted_key_39_interleave_0 = const()[name = string("shifted_key_39_interleave_0"), val = bool(false)]; - tensor shifted_key_39_cast_fp16 = concat(axis = var_18343, interleave = shifted_key_39_interleave_0, values = (var_18316_cast_fp16, key_states_223_cast_fp16))[name = string("shifted_key_39_cast_fp16")]; - tensor concat_320 = const()[name = string("concat_320"), val = tensor([19, 0, 0, 0])]; - tensor concat_321 = const()[name = string("concat_321"), val = tensor([20, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16 = slice_update(begin = concat_320, begin_mask = model_model_kv_cache_local_internal_tensor_assign_39_begin_mask_0, end = concat_321, end_mask = model_model_kv_cache_local_internal_tensor_assign_39_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_39_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_39_stride_0, update = shifted_key_39_cast_fp16, x = coreml_update_state_95)[name = string("model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_39_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_200_write_state")]; - tensor coreml_update_state_96 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_200")]; - tensor value_slice_39_begin_0 = const()[name = string("value_slice_39_begin_0"), val = tensor([41, 0, 0, 0])]; - tensor value_slice_39_end_0 = const()[name = string("value_slice_39_end_0"), val = tensor([42, 1, 512, 256])]; - tensor value_slice_39_end_mask_0 = const()[name = string("value_slice_39_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_39_cast_fp16 = slice_by_index(begin = value_slice_39_begin_0, end = value_slice_39_end_0, end_mask = value_slice_39_end_mask_0, x = coreml_update_state_96)[name = string("value_slice_39_cast_fp16")]; - tensor var_18386_begin_0 = const()[name = string("op_18386_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_18386_end_0 = const()[name = string("op_18386_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_18386_end_mask_0 = const()[name = string("op_18386_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_18386_cast_fp16 = slice_by_index(begin = var_18386_begin_0, end = var_18386_end_0, end_mask = var_18386_end_mask_0, x = value_slice_39_cast_fp16)[name = string("op_18386_cast_fp16")]; - int32 var_18413 = const()[name = string("op_18413"), val = int32(2)]; - bool shifted_value_39_interleave_0 = const()[name = string("shifted_value_39_interleave_0"), val = bool(false)]; - tensor value_states_179 = transpose(perm = var_18158, x = var_18153)[name = string("transpose_32")]; - tensor shifted_value_39_cast_fp16 = concat(axis = var_18413, interleave = shifted_value_39_interleave_0, values = (var_18386_cast_fp16, value_states_179))[name = string("shifted_value_39_cast_fp16")]; - tensor concat_322 = const()[name = string("concat_322"), val = tensor([41, 0, 0, 0])]; - tensor concat_323 = const()[name = string("concat_323"), val = tensor([42, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16 = slice_update(begin = concat_322, begin_mask = model_model_kv_cache_local_internal_tensor_assign_40_begin_mask_0, end = concat_323, end_mask = model_model_kv_cache_local_internal_tensor_assign_40_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_40_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_40_stride_0, update = shifted_value_39_cast_fp16, x = coreml_update_state_96)[name = string("model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_40_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_201_write_state")]; - tensor coreml_update_state_97 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_201")]; - tensor var_18441_begin_0 = const()[name = string("op_18441_begin_0"), val = tensor([19, 0, 0, 0])]; - tensor var_18441_end_0 = const()[name = string("op_18441_end_0"), val = tensor([20, 1, 512, 256])]; - tensor var_18441_end_mask_0 = const()[name = string("op_18441_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_18441_cast_fp16 = slice_by_index(begin = var_18441_begin_0, end = var_18441_end_0, end_mask = var_18441_end_mask_0, x = coreml_update_state_97)[name = string("op_18441_cast_fp16")]; - tensor var_18448_begin_0 = const()[name = string("op_18448_begin_0"), val = tensor([41, 0, 0, 0])]; - tensor var_18448_end_0 = const()[name = string("op_18448_end_0"), val = tensor([42, 1, 512, 256])]; - tensor var_18448_end_mask_0 = const()[name = string("op_18448_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_18448_cast_fp16 = slice_by_index(begin = var_18448_begin_0, end = var_18448_end_0, end_mask = var_18448_end_mask_0, x = coreml_update_state_97)[name = string("op_18448_cast_fp16")]; - tensor var_18487 = const()[name = string("op_18487"), val = tensor([1, 4, 1, 1])]; - tensor x_357_cast_fp16 = tile(reps = var_18487, x = var_18441_cast_fp16)[name = string("x_357_cast_fp16")]; - tensor var_18507 = const()[name = string("op_18507"), val = tensor([1, 4, 1, 1])]; - tensor x_363_cast_fp16 = tile(reps = var_18507, x = var_18448_cast_fp16)[name = string("x_363_cast_fp16")]; - bool var_18534_transpose_x_0 = const()[name = string("op_18534_transpose_x_0"), val = bool(false)]; - bool var_18534_transpose_y_0 = const()[name = string("op_18534_transpose_y_0"), val = bool(true)]; - tensor var_18534 = matmul(transpose_x = var_18534_transpose_x_0, transpose_y = var_18534_transpose_y_0, x = query_states_179_cast_fp16, y = x_357_cast_fp16)[name = string("op_18534")]; - fp16 var_18535_to_fp16 = const()[name = string("op_18535_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_89_cast_fp16 = mul(x = var_18534, y = var_18535_to_fp16)[name = string("attn_weights_89_cast_fp16")]; - tensor attn_weights_91_cast_fp16 = add(x = attn_weights_89_cast_fp16, y = mask_slice_1)[name = string("attn_weights_91_cast_fp16")]; - int32 var_18570 = const()[name = string("op_18570"), val = int32(-1)]; - tensor var_18572_cast_fp16 = softmax(axis = var_18570, x = attn_weights_91_cast_fp16)[name = string("op_18572_cast_fp16")]; - tensor concat_328 = const()[name = string("concat_328"), val = tensor([4, 64, 512])]; - tensor reshape_66_cast_fp16 = reshape(shape = concat_328, x = var_18572_cast_fp16)[name = string("reshape_66_cast_fp16")]; - tensor concat_329 = const()[name = string("concat_329"), val = tensor([4, 512, 256])]; - tensor reshape_67_cast_fp16 = reshape(shape = concat_329, x = x_363_cast_fp16)[name = string("reshape_67_cast_fp16")]; - bool matmul_22_transpose_x_0 = const()[name = string("matmul_22_transpose_x_0"), val = bool(false)]; - bool matmul_22_transpose_y_0 = const()[name = string("matmul_22_transpose_y_0"), val = bool(false)]; - tensor matmul_22_cast_fp16 = matmul(transpose_x = matmul_22_transpose_x_0, transpose_y = matmul_22_transpose_y_0, x = reshape_66_cast_fp16, y = reshape_67_cast_fp16)[name = string("matmul_22_cast_fp16")]; - tensor concat_333 = const()[name = string("concat_333"), val = tensor([1, 4, 64, 256])]; - tensor reshape_68_cast_fp16 = reshape(shape = concat_333, x = matmul_22_cast_fp16)[name = string("reshape_68_cast_fp16")]; - tensor var_18584_perm_0 = const()[name = string("op_18584_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_18603 = const()[name = string("op_18603"), val = tensor([1, 64, 1024])]; - tensor var_18584_cast_fp16 = transpose(perm = var_18584_perm_0, x = reshape_68_cast_fp16)[name = string("transpose_31")]; - tensor attn_output_225_cast_fp16 = reshape(shape = var_18603, x = var_18584_cast_fp16)[name = string("attn_output_225_cast_fp16")]; - tensor var_18608 = const()[name = string("op_18608"), val = tensor([0, 2, 1])]; - string var_18624_pad_type_0 = const()[name = string("op_18624_pad_type_0"), val = string("valid")]; - int32 var_18624_groups_0 = const()[name = string("op_18624_groups_0"), val = int32(1)]; - tensor var_18624_strides_0 = const()[name = string("op_18624_strides_0"), val = tensor([1])]; - tensor var_18624_pad_0 = const()[name = string("op_18624_pad_0"), val = tensor([0, 0])]; - tensor var_18624_dilations_0 = const()[name = string("op_18624_dilations_0"), val = tensor([1])]; - tensor squeeze_22_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471470848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472355648))))[name = string("squeeze_22_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_18609_cast_fp16 = transpose(perm = var_18608, x = attn_output_225_cast_fp16)[name = string("transpose_30")]; - tensor var_18624_cast_fp16 = conv(dilations = var_18624_dilations_0, groups = var_18624_groups_0, pad = var_18624_pad_0, pad_type = var_18624_pad_type_0, strides = var_18624_strides_0, weight = squeeze_22_cast_fp16_to_fp32_to_fp16_palettized, x = var_18609_cast_fp16)[name = string("op_18624_cast_fp16")]; - tensor var_18628 = const()[name = string("op_18628"), val = tensor([0, 2, 1])]; - int32 var_18639 = const()[name = string("op_18639"), val = int32(-1)]; - fp16 const_957_promoted_to_fp16 = const()[name = string("const_957_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_361_cast_fp16 = transpose(perm = var_18628, x = var_18624_cast_fp16)[name = string("transpose_29")]; - tensor var_18641_cast_fp16 = mul(x = hidden_states_361_cast_fp16, y = const_957_promoted_to_fp16)[name = string("op_18641_cast_fp16")]; - bool input_451_interleave_0 = const()[name = string("input_451_interleave_0"), val = bool(false)]; - tensor input_451_cast_fp16 = concat(axis = var_18639, interleave = input_451_interleave_0, values = (hidden_states_361_cast_fp16, var_18641_cast_fp16))[name = string("input_451_cast_fp16")]; - tensor normed_541_axes_0 = const()[name = string("normed_541_axes_0"), val = tensor([-1])]; - fp16 var_18636_to_fp16 = const()[name = string("op_18636_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_541_cast_fp16 = layer_norm(axes = normed_541_axes_0, epsilon = var_18636_to_fp16, x = input_451_cast_fp16)[name = string("normed_541_cast_fp16")]; - tensor normed_543_begin_0 = const()[name = string("normed_543_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_543_end_0 = const()[name = string("normed_543_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_543_end_mask_0 = const()[name = string("normed_543_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_543_cast_fp16 = slice_by_index(begin = normed_543_begin_0, end = normed_543_end_0, end_mask = normed_543_end_mask_0, x = normed_541_cast_fp16)[name = string("normed_543_cast_fp16")]; - tensor var_18655_to_fp16 = const()[name = string("op_18655_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472374144)))]; - tensor attn_output_229_cast_fp16 = mul(x = normed_543_cast_fp16, y = var_18655_to_fp16)[name = string("attn_output_229_cast_fp16")]; - tensor hidden_states_363_cast_fp16 = add(x = hidden_states_353_cast_fp16, y = attn_output_229_cast_fp16)[name = string("hidden_states_363_cast_fp16")]; - int32 var_18668 = const()[name = string("op_18668"), val = int32(-1)]; - fp16 const_961_promoted_to_fp16 = const()[name = string("const_961_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_18670_cast_fp16 = mul(x = hidden_states_363_cast_fp16, y = const_961_promoted_to_fp16)[name = string("op_18670_cast_fp16")]; - bool input_453_interleave_0 = const()[name = string("input_453_interleave_0"), val = bool(false)]; - tensor input_453_cast_fp16 = concat(axis = var_18668, interleave = input_453_interleave_0, values = (hidden_states_363_cast_fp16, var_18670_cast_fp16))[name = string("input_453_cast_fp16")]; - tensor normed_545_axes_0 = const()[name = string("normed_545_axes_0"), val = tensor([-1])]; - fp16 var_18665_to_fp16 = const()[name = string("op_18665_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_545_cast_fp16 = layer_norm(axes = normed_545_axes_0, epsilon = var_18665_to_fp16, x = input_453_cast_fp16)[name = string("normed_545_cast_fp16")]; - tensor normed_547_begin_0 = const()[name = string("normed_547_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_547_end_0 = const()[name = string("normed_547_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_547_end_mask_0 = const()[name = string("normed_547_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_547_cast_fp16 = slice_by_index(begin = normed_547_begin_0, end = normed_547_end_0, end_mask = normed_547_end_mask_0, x = normed_545_cast_fp16)[name = string("normed_547_cast_fp16")]; - tensor var_18684_to_fp16 = const()[name = string("op_18684_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472376512)))]; - tensor x_365_cast_fp16 = mul(x = normed_547_cast_fp16, y = var_18684_to_fp16)[name = string("x_365_cast_fp16")]; - tensor var_18696 = const()[name = string("op_18696"), val = tensor([0, 2, 1])]; - tensor input_455_axes_0 = const()[name = string("input_455_axes_0"), val = tensor([2])]; - tensor var_18697_cast_fp16 = transpose(perm = var_18696, x = x_365_cast_fp16)[name = string("transpose_28")]; - tensor input_455_cast_fp16 = expand_dims(axes = input_455_axes_0, x = var_18697_cast_fp16)[name = string("input_455_cast_fp16")]; - string x_367_pad_type_0 = const()[name = string("x_367_pad_type_0"), val = string("valid")]; - tensor x_367_strides_0 = const()[name = string("x_367_strides_0"), val = tensor([1, 1])]; - tensor x_367_pad_0 = const()[name = string("x_367_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_367_dilations_0 = const()[name = string("x_367_dilations_0"), val = tensor([1, 1])]; - int32 x_367_groups_0 = const()[name = string("x_367_groups_0"), val = int32(1)]; - tensor model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1459073472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1465045504))))[name = string("model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_367_cast_fp16 = conv(dilations = x_367_dilations_0, groups = x_367_groups_0, pad = x_367_pad_0, pad_type = x_367_pad_type_0, strides = x_367_strides_0, weight = model_model_layers_22_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_455_cast_fp16)[name = string("x_367_cast_fp16")]; - string b_45_pad_type_0 = const()[name = string("b_45_pad_type_0"), val = string("valid")]; - tensor b_45_strides_0 = const()[name = string("b_45_strides_0"), val = tensor([1, 1])]; - tensor b_45_pad_0 = const()[name = string("b_45_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_45_dilations_0 = const()[name = string("b_45_dilations_0"), val = tensor([1, 1])]; - int32 b_45_groups_0 = const()[name = string("b_45_groups_0"), val = int32(1)]; - tensor model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1465156160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1471128192))))[name = string("model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_45_cast_fp16 = conv(dilations = b_45_dilations_0, groups = b_45_groups_0, pad = b_45_pad_0, pad_type = b_45_pad_type_0, strides = b_45_strides_0, weight = model_model_layers_22_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_455_cast_fp16)[name = string("b_45_cast_fp16")]; - string var_18722_mode_0 = const()[name = string("op_18722_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_18722_cast_fp16 = gelu(mode = var_18722_mode_0, x = x_367_cast_fp16)[name = string("op_18722_cast_fp16")]; - tensor input_457_cast_fp16 = mul(x = var_18722_cast_fp16, y = b_45_cast_fp16)[name = string("input_457_cast_fp16")]; - string e_45_pad_type_0 = const()[name = string("e_45_pad_type_0"), val = string("valid")]; - tensor e_45_strides_0 = const()[name = string("e_45_strides_0"), val = tensor([1, 1])]; - tensor e_45_pad_0 = const()[name = string("e_45_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_45_dilations_0 = const()[name = string("e_45_dilations_0"), val = tensor([1, 1])]; - int32 e_45_groups_0 = const()[name = string("e_45_groups_0"), val = int32(1)]; - tensor model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484544256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490516288))))[name = string("model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_45_cast_fp16 = conv(dilations = e_45_dilations_0, groups = e_45_groups_0, pad = e_45_pad_0, pad_type = e_45_pad_type_0, strides = e_45_strides_0, weight = model_model_layers_22_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_457_cast_fp16)[name = string("e_45_cast_fp16")]; - tensor var_18730_axes_0 = const()[name = string("op_18730_axes_0"), val = tensor([2])]; - tensor var_18730_cast_fp16 = squeeze(axes = var_18730_axes_0, x = e_45_cast_fp16)[name = string("op_18730_cast_fp16")]; - tensor var_18731 = const()[name = string("op_18731"), val = tensor([0, 2, 1])]; - int32 var_18742 = const()[name = string("op_18742"), val = int32(-1)]; - fp16 const_965_promoted_to_fp16 = const()[name = string("const_965_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_365_cast_fp16 = transpose(perm = var_18731, x = var_18730_cast_fp16)[name = string("transpose_27")]; - tensor var_18744_cast_fp16 = mul(x = hidden_states_365_cast_fp16, y = const_965_promoted_to_fp16)[name = string("op_18744_cast_fp16")]; - bool input_459_interleave_0 = const()[name = string("input_459_interleave_0"), val = bool(false)]; - tensor input_459_cast_fp16 = concat(axis = var_18742, interleave = input_459_interleave_0, values = (hidden_states_365_cast_fp16, var_18744_cast_fp16))[name = string("input_459_cast_fp16")]; - tensor normed_549_axes_0 = const()[name = string("normed_549_axes_0"), val = tensor([-1])]; - fp16 var_18739_to_fp16 = const()[name = string("op_18739_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_549_cast_fp16 = layer_norm(axes = normed_549_axes_0, epsilon = var_18739_to_fp16, x = input_459_cast_fp16)[name = string("normed_549_cast_fp16")]; - tensor normed_551_begin_0 = const()[name = string("normed_551_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_551_end_0 = const()[name = string("normed_551_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_551_end_mask_0 = const()[name = string("normed_551_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_551_cast_fp16 = slice_by_index(begin = normed_551_begin_0, end = normed_551_end_0, end_mask = normed_551_end_mask_0, x = normed_549_cast_fp16)[name = string("normed_551_cast_fp16")]; - tensor var_18758_to_fp16 = const()[name = string("op_18758_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490534784)))]; - tensor hidden_states_367_cast_fp16 = mul(x = normed_551_cast_fp16, y = var_18758_to_fp16)[name = string("hidden_states_367_cast_fp16")]; - tensor hidden_states_369_cast_fp16 = add(x = hidden_states_363_cast_fp16, y = hidden_states_367_cast_fp16)[name = string("hidden_states_369_cast_fp16")]; - int32 var_18812 = const()[name = string("op_18812"), val = int32(-1)]; - fp16 const_970_promoted_to_fp16 = const()[name = string("const_970_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_18814_cast_fp16 = mul(x = hidden_states_369_cast_fp16, y = const_970_promoted_to_fp16)[name = string("op_18814_cast_fp16")]; - bool input_461_interleave_0 = const()[name = string("input_461_interleave_0"), val = bool(false)]; - tensor input_461_cast_fp16 = concat(axis = var_18812, interleave = input_461_interleave_0, values = (hidden_states_369_cast_fp16, var_18814_cast_fp16))[name = string("input_461_cast_fp16")]; - tensor normed_553_axes_0 = const()[name = string("normed_553_axes_0"), val = tensor([-1])]; - fp16 var_18809_to_fp16 = const()[name = string("op_18809_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_553_cast_fp16 = layer_norm(axes = normed_553_axes_0, epsilon = var_18809_to_fp16, x = input_461_cast_fp16)[name = string("normed_553_cast_fp16")]; - tensor normed_555_begin_0 = const()[name = string("normed_555_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_555_end_0 = const()[name = string("normed_555_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_555_end_mask_0 = const()[name = string("normed_555_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_555_cast_fp16 = slice_by_index(begin = normed_555_begin_0, end = normed_555_end_0, end_mask = normed_555_end_mask_0, x = normed_553_cast_fp16)[name = string("normed_555_cast_fp16")]; - tensor var_18828_to_fp16 = const()[name = string("op_18828_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490537152)))]; - tensor hidden_states_371_cast_fp16 = mul(x = normed_555_cast_fp16, y = var_18828_to_fp16)[name = string("hidden_states_371_cast_fp16")]; - tensor var_18839 = const()[name = string("op_18839"), val = tensor([0, 2, 1])]; - tensor var_18842_axes_0 = const()[name = string("op_18842_axes_0"), val = tensor([2])]; - tensor var_18840_cast_fp16 = transpose(perm = var_18839, x = hidden_states_371_cast_fp16)[name = string("transpose_26")]; - tensor var_18842_cast_fp16 = expand_dims(axes = var_18842_axes_0, x = var_18840_cast_fp16)[name = string("op_18842_cast_fp16")]; - string query_states_185_pad_type_0 = const()[name = string("query_states_185_pad_type_0"), val = string("valid")]; - tensor query_states_185_strides_0 = const()[name = string("query_states_185_strides_0"), val = tensor([1, 1])]; - tensor query_states_185_pad_0 = const()[name = string("query_states_185_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_185_dilations_0 = const()[name = string("query_states_185_dilations_0"), val = tensor([1, 1])]; - int32 query_states_185_groups_0 = const()[name = string("query_states_185_groups_0"), val = int32(1)]; - tensor query_states_185 = conv(dilations = query_states_185_dilations_0, groups = query_states_185_groups_0, pad = query_states_185_pad_0, pad_type = query_states_185_pad_type_0, strides = query_states_185_strides_0, weight = model_model_layers_23_self_attn_q_proj_weight_palettized, x = var_18842_cast_fp16)[name = string("query_states_185")]; - string key_states_231_pad_type_0 = const()[name = string("key_states_231_pad_type_0"), val = string("valid")]; - tensor key_states_231_strides_0 = const()[name = string("key_states_231_strides_0"), val = tensor([1, 1])]; - tensor key_states_231_pad_0 = const()[name = string("key_states_231_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_231_dilations_0 = const()[name = string("key_states_231_dilations_0"), val = tensor([1, 1])]; - int32 key_states_231_groups_0 = const()[name = string("key_states_231_groups_0"), val = int32(1)]; - tensor key_states_231 = conv(dilations = key_states_231_dilations_0, groups = key_states_231_groups_0, pad = key_states_231_pad_0, pad_type = key_states_231_pad_type_0, strides = key_states_231_strides_0, weight = model_model_layers_23_self_attn_k_proj_weight_palettized, x = var_18842_cast_fp16)[name = string("key_states_231")]; - string value_states_185_pad_type_0 = const()[name = string("value_states_185_pad_type_0"), val = string("valid")]; - tensor value_states_185_strides_0 = const()[name = string("value_states_185_strides_0"), val = tensor([1, 1])]; - tensor value_states_185_pad_0 = const()[name = string("value_states_185_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_185_dilations_0 = const()[name = string("value_states_185_dilations_0"), val = tensor([1, 1])]; - int32 value_states_185_groups_0 = const()[name = string("value_states_185_groups_0"), val = int32(1)]; - tensor value_states_185 = conv(dilations = value_states_185_dilations_0, groups = value_states_185_groups_0, pad = value_states_185_pad_0, pad_type = value_states_185_pad_type_0, strides = value_states_185_strides_0, weight = model_model_layers_23_self_attn_v_proj_weight_palettized, x = var_18842_cast_fp16)[name = string("value_states_185")]; - tensor var_18884 = const()[name = string("op_18884"), val = tensor([1, 4, 256, 64])]; - tensor var_18885 = reshape(shape = var_18884, x = query_states_185)[name = string("op_18885")]; - tensor var_18890 = const()[name = string("op_18890"), val = tensor([0, 1, 3, 2])]; - tensor var_18895 = const()[name = string("op_18895"), val = tensor([1, 1, 256, 64])]; - tensor var_18896 = reshape(shape = var_18895, x = key_states_231)[name = string("op_18896")]; - tensor var_18901 = const()[name = string("op_18901"), val = tensor([0, 1, 3, 2])]; - tensor var_18906 = const()[name = string("op_18906"), val = tensor([1, 1, 256, 64])]; - tensor var_18907 = reshape(shape = var_18906, x = value_states_185)[name = string("op_18907")]; - tensor var_18912 = const()[name = string("op_18912"), val = tensor([0, 1, 3, 2])]; - int32 var_18923 = const()[name = string("op_18923"), val = int32(-1)]; - fp16 const_975_promoted = const()[name = string("const_975_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_373 = transpose(perm = var_18890, x = var_18885)[name = string("transpose_25")]; - tensor var_18925 = mul(x = hidden_states_373, y = const_975_promoted)[name = string("op_18925")]; - bool input_465_interleave_0 = const()[name = string("input_465_interleave_0"), val = bool(false)]; - tensor input_465 = concat(axis = var_18923, interleave = input_465_interleave_0, values = (hidden_states_373, var_18925))[name = string("input_465")]; - tensor normed_557_axes_0 = const()[name = string("normed_557_axes_0"), val = tensor([-1])]; - fp16 var_18920_to_fp16 = const()[name = string("op_18920_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_557_cast_fp16 = layer_norm(axes = normed_557_axes_0, epsilon = var_18920_to_fp16, x = input_465)[name = string("normed_557_cast_fp16")]; - tensor normed_559_begin_0 = const()[name = string("normed_559_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_559_end_0 = const()[name = string("normed_559_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_559_end_mask_0 = const()[name = string("normed_559_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_559 = slice_by_index(begin = normed_559_begin_0, end = normed_559_end_0, end_mask = normed_559_end_mask_0, x = normed_557_cast_fp16)[name = string("normed_559")]; - tensor var_18939_to_fp16 = const()[name = string("op_18939_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490539520)))]; - tensor q_47_cast_fp16 = mul(x = normed_559, y = var_18939_to_fp16)[name = string("q_47_cast_fp16")]; - int32 var_18950 = const()[name = string("op_18950"), val = int32(-1)]; - fp16 const_979_promoted = const()[name = string("const_979_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_375 = transpose(perm = var_18901, x = var_18896)[name = string("transpose_24")]; - tensor var_18952 = mul(x = hidden_states_375, y = const_979_promoted)[name = string("op_18952")]; - bool input_467_interleave_0 = const()[name = string("input_467_interleave_0"), val = bool(false)]; - tensor input_467 = concat(axis = var_18950, interleave = input_467_interleave_0, values = (hidden_states_375, var_18952))[name = string("input_467")]; - tensor normed_561_axes_0 = const()[name = string("normed_561_axes_0"), val = tensor([-1])]; - fp16 var_18947_to_fp16 = const()[name = string("op_18947_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_561_cast_fp16 = layer_norm(axes = normed_561_axes_0, epsilon = var_18947_to_fp16, x = input_467)[name = string("normed_561_cast_fp16")]; - tensor normed_563_begin_0 = const()[name = string("normed_563_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_563_end_0 = const()[name = string("normed_563_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_563_end_mask_0 = const()[name = string("normed_563_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_563 = slice_by_index(begin = normed_563_begin_0, end = normed_563_end_0, end_mask = normed_563_end_mask_0, x = normed_561_cast_fp16)[name = string("normed_563")]; - tensor var_18966_to_fp16 = const()[name = string("op_18966_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490540096)))]; - tensor k_47_cast_fp16 = mul(x = normed_563, y = var_18966_to_fp16)[name = string("k_47_cast_fp16")]; - tensor var_18980_cast_fp16 = mul(x = q_47_cast_fp16, y = cos_35)[name = string("op_18980_cast_fp16")]; - tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_93_cast_fp16 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_47_cast_fp16)[name = string("x1_93_cast_fp16")]; - tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_93_cast_fp16 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_47_cast_fp16)[name = string("x2_93_cast_fp16")]; - fp16 const_985_promoted_to_fp16 = const()[name = string("const_985_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_19001_cast_fp16 = mul(x = x2_93_cast_fp16, y = const_985_promoted_to_fp16)[name = string("op_19001_cast_fp16")]; - int32 var_19003 = const()[name = string("op_19003"), val = int32(-1)]; - bool var_19004_interleave_0 = const()[name = string("op_19004_interleave_0"), val = bool(false)]; - tensor var_19004_cast_fp16 = concat(axis = var_19003, interleave = var_19004_interleave_0, values = (var_19001_cast_fp16, x1_93_cast_fp16))[name = string("op_19004_cast_fp16")]; - tensor var_19005_cast_fp16 = mul(x = var_19004_cast_fp16, y = sin_35)[name = string("op_19005_cast_fp16")]; - tensor query_states_187_cast_fp16 = add(x = var_18980_cast_fp16, y = var_19005_cast_fp16)[name = string("query_states_187_cast_fp16")]; - tensor var_19008_cast_fp16 = mul(x = k_47_cast_fp16, y = cos_35)[name = string("op_19008_cast_fp16")]; - tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_95_cast_fp16 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = k_47_cast_fp16)[name = string("x1_95_cast_fp16")]; - tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_95_cast_fp16 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = k_47_cast_fp16)[name = string("x2_95_cast_fp16")]; - fp16 const_988_promoted_to_fp16 = const()[name = string("const_988_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_19029_cast_fp16 = mul(x = x2_95_cast_fp16, y = const_988_promoted_to_fp16)[name = string("op_19029_cast_fp16")]; - int32 var_19031 = const()[name = string("op_19031"), val = int32(-1)]; - bool var_19032_interleave_0 = const()[name = string("op_19032_interleave_0"), val = bool(false)]; - tensor var_19032_cast_fp16 = concat(axis = var_19031, interleave = var_19032_interleave_0, values = (var_19029_cast_fp16, x1_95_cast_fp16))[name = string("op_19032_cast_fp16")]; - tensor var_19033_cast_fp16 = mul(x = var_19032_cast_fp16, y = sin_35)[name = string("op_19033_cast_fp16")]; - tensor key_states_233_cast_fp16 = add(x = var_19008_cast_fp16, y = var_19033_cast_fp16)[name = string("key_states_233_cast_fp16")]; - tensor expand_dims_236 = const()[name = string("expand_dims_236"), val = tensor([3])]; - tensor expand_dims_237 = const()[name = string("expand_dims_237"), val = tensor([0])]; - tensor expand_dims_239 = const()[name = string("expand_dims_239"), val = tensor([0])]; - tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([4])]; - int32 concat_336_axis_0 = const()[name = string("concat_336_axis_0"), val = int32(0)]; - bool concat_336_interleave_0 = const()[name = string("concat_336_interleave_0"), val = bool(false)]; - tensor concat_336 = concat(axis = concat_336_axis_0, interleave = concat_336_interleave_0, values = (expand_dims_236, expand_dims_237, current_pos, expand_dims_239))[name = string("concat_336")]; - tensor concat_337_values1_0 = const()[name = string("concat_337_values1_0"), val = tensor([0])]; - tensor concat_337_values3_0 = const()[name = string("concat_337_values3_0"), val = tensor([0])]; - int32 concat_337_axis_0 = const()[name = string("concat_337_axis_0"), val = int32(0)]; - bool concat_337_interleave_0 = const()[name = string("concat_337_interleave_0"), val = bool(false)]; - tensor concat_337 = concat(axis = concat_337_axis_0, interleave = concat_337_interleave_0, values = (expand_dims_240, concat_337_values1_0, end_pos_1, concat_337_values3_0))[name = string("concat_337")]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_336, begin_mask = model_model_kv_cache_global_internal_tensor_assign_7_begin_mask_0, end = concat_337, end_mask = model_model_kv_cache_global_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_7_stride_0, update = key_states_233_cast_fp16, x = coreml_update_state_87)[name = string("model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_202_write_state")]; - tensor coreml_update_state_98 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_202")]; - tensor expand_dims_242 = const()[name = string("expand_dims_242"), val = tensor([7])]; - tensor expand_dims_243 = const()[name = string("expand_dims_243"), val = tensor([0])]; - tensor expand_dims_245 = const()[name = string("expand_dims_245"), val = tensor([0])]; - tensor expand_dims_246 = const()[name = string("expand_dims_246"), val = tensor([8])]; - int32 concat_340_axis_0 = const()[name = string("concat_340_axis_0"), val = int32(0)]; - bool concat_340_interleave_0 = const()[name = string("concat_340_interleave_0"), val = bool(false)]; - tensor concat_340 = concat(axis = concat_340_axis_0, interleave = concat_340_interleave_0, values = (expand_dims_242, expand_dims_243, current_pos, expand_dims_245))[name = string("concat_340")]; - tensor concat_341_values1_0 = const()[name = string("concat_341_values1_0"), val = tensor([0])]; - tensor concat_341_values3_0 = const()[name = string("concat_341_values3_0"), val = tensor([0])]; - int32 concat_341_axis_0 = const()[name = string("concat_341_axis_0"), val = int32(0)]; - bool concat_341_interleave_0 = const()[name = string("concat_341_interleave_0"), val = bool(false)]; - tensor concat_341 = concat(axis = concat_341_axis_0, interleave = concat_341_interleave_0, values = (expand_dims_246, concat_341_values1_0, end_pos_1, concat_341_values3_0))[name = string("concat_341")]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor value_states_187 = transpose(perm = var_18912, x = var_18907)[name = string("transpose_23")]; - tensor model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_340, begin_mask = model_model_kv_cache_global_internal_tensor_assign_8_begin_mask_0, end = concat_341, end_mask = model_model_kv_cache_global_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_global_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_global_internal_tensor_assign_8_stride_0, update = value_states_187, x = coreml_update_state_98)[name = string("model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16")]; - write_state(data = model_model_kv_cache_global_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_global)[name = string("coreml_update_state_203_write_state")]; - tensor coreml_update_state_99 = read_state(input = model_model_kv_cache_global)[name = string("coreml_update_state_203")]; - tensor var_19132_begin_0 = const()[name = string("op_19132_begin_0"), val = tensor([3, 0, 0, 0])]; - tensor var_19132_end_0 = const()[name = string("op_19132_end_0"), val = tensor([4, 1, 4096, 256])]; - tensor var_19132_end_mask_0 = const()[name = string("op_19132_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_19132_cast_fp16 = slice_by_index(begin = var_19132_begin_0, end = var_19132_end_0, end_mask = var_19132_end_mask_0, x = coreml_update_state_99)[name = string("op_19132_cast_fp16")]; - tensor var_19139_begin_0 = const()[name = string("op_19139_begin_0"), val = tensor([7, 0, 0, 0])]; - tensor var_19139_end_0 = const()[name = string("op_19139_end_0"), val = tensor([1, 1, 4096, 256])]; - tensor var_19139_end_mask_0 = const()[name = string("op_19139_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_19139_cast_fp16 = slice_by_index(begin = var_19139_begin_0, end = var_19139_end_0, end_mask = var_19139_end_mask_0, x = coreml_update_state_99)[name = string("op_19139_cast_fp16")]; - tensor var_19178 = const()[name = string("op_19178"), val = tensor([1, 4, 1, 1])]; - tensor x_373_cast_fp16 = tile(reps = var_19178, x = var_19132_cast_fp16)[name = string("x_373_cast_fp16")]; - tensor var_19198 = const()[name = string("op_19198"), val = tensor([1, 4, 1, 1])]; - tensor x_379_cast_fp16 = tile(reps = var_19198, x = var_19139_cast_fp16)[name = string("x_379_cast_fp16")]; - bool var_19225_transpose_x_0 = const()[name = string("op_19225_transpose_x_0"), val = bool(false)]; - bool var_19225_transpose_y_0 = const()[name = string("op_19225_transpose_y_0"), val = bool(true)]; - tensor var_19225 = matmul(transpose_x = var_19225_transpose_x_0, transpose_y = var_19225_transpose_y_0, x = query_states_187_cast_fp16, y = x_373_cast_fp16)[name = string("op_19225")]; - fp16 var_19226_to_fp16 = const()[name = string("op_19226_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_93_cast_fp16 = mul(x = var_19225, y = var_19226_to_fp16)[name = string("attn_weights_93_cast_fp16")]; - tensor attn_weights_95_cast_fp16 = add(x = attn_weights_93_cast_fp16, y = causal_mask)[name = string("attn_weights_95_cast_fp16")]; - int32 var_19261 = const()[name = string("op_19261"), val = int32(-1)]; - tensor var_19263_cast_fp16 = softmax(axis = var_19261, x = attn_weights_95_cast_fp16)[name = string("op_19263_cast_fp16")]; - tensor concat_346 = const()[name = string("concat_346"), val = tensor([4, 64, 4096])]; - tensor reshape_69_cast_fp16 = reshape(shape = concat_346, x = var_19263_cast_fp16)[name = string("reshape_69_cast_fp16")]; - tensor concat_347 = const()[name = string("concat_347"), val = tensor([4, 4096, 256])]; - tensor reshape_70_cast_fp16 = reshape(shape = concat_347, x = x_379_cast_fp16)[name = string("reshape_70_cast_fp16")]; - bool matmul_23_transpose_x_0 = const()[name = string("matmul_23_transpose_x_0"), val = bool(false)]; - bool matmul_23_transpose_y_0 = const()[name = string("matmul_23_transpose_y_0"), val = bool(false)]; - tensor matmul_23_cast_fp16 = matmul(transpose_x = matmul_23_transpose_x_0, transpose_y = matmul_23_transpose_y_0, x = reshape_69_cast_fp16, y = reshape_70_cast_fp16)[name = string("matmul_23_cast_fp16")]; - tensor concat_351 = const()[name = string("concat_351"), val = tensor([1, 4, 64, 256])]; - tensor reshape_71_cast_fp16 = reshape(shape = concat_351, x = matmul_23_cast_fp16)[name = string("reshape_71_cast_fp16")]; - tensor var_19275_perm_0 = const()[name = string("op_19275_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_19294 = const()[name = string("op_19294"), val = tensor([1, 64, 1024])]; - tensor var_19275_cast_fp16 = transpose(perm = var_19275_perm_0, x = reshape_71_cast_fp16)[name = string("transpose_22")]; - tensor attn_output_235_cast_fp16 = reshape(shape = var_19294, x = var_19275_cast_fp16)[name = string("attn_output_235_cast_fp16")]; - tensor var_19299 = const()[name = string("op_19299"), val = tensor([0, 2, 1])]; - string var_19315_pad_type_0 = const()[name = string("op_19315_pad_type_0"), val = string("valid")]; - int32 var_19315_groups_0 = const()[name = string("op_19315_groups_0"), val = int32(1)]; - tensor var_19315_strides_0 = const()[name = string("op_19315_strides_0"), val = tensor([1])]; - tensor var_19315_pad_0 = const()[name = string("op_19315_pad_0"), val = tensor([0, 0])]; - tensor var_19315_dilations_0 = const()[name = string("op_19315_dilations_0"), val = tensor([1])]; - tensor squeeze_23_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490540672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491425472))))[name = string("squeeze_23_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_19300_cast_fp16 = transpose(perm = var_19299, x = attn_output_235_cast_fp16)[name = string("transpose_21")]; - tensor var_19315_cast_fp16 = conv(dilations = var_19315_dilations_0, groups = var_19315_groups_0, pad = var_19315_pad_0, pad_type = var_19315_pad_type_0, strides = var_19315_strides_0, weight = squeeze_23_cast_fp16_to_fp32_to_fp16_palettized, x = var_19300_cast_fp16)[name = string("op_19315_cast_fp16")]; - tensor var_19319 = const()[name = string("op_19319"), val = tensor([0, 2, 1])]; - int32 var_19330 = const()[name = string("op_19330"), val = int32(-1)]; - fp16 const_1000_promoted_to_fp16 = const()[name = string("const_1000_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_377_cast_fp16 = transpose(perm = var_19319, x = var_19315_cast_fp16)[name = string("transpose_20")]; - tensor var_19332_cast_fp16 = mul(x = hidden_states_377_cast_fp16, y = const_1000_promoted_to_fp16)[name = string("op_19332_cast_fp16")]; - bool input_471_interleave_0 = const()[name = string("input_471_interleave_0"), val = bool(false)]; - tensor input_471_cast_fp16 = concat(axis = var_19330, interleave = input_471_interleave_0, values = (hidden_states_377_cast_fp16, var_19332_cast_fp16))[name = string("input_471_cast_fp16")]; - tensor normed_565_axes_0 = const()[name = string("normed_565_axes_0"), val = tensor([-1])]; - fp16 var_19327_to_fp16 = const()[name = string("op_19327_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_565_cast_fp16 = layer_norm(axes = normed_565_axes_0, epsilon = var_19327_to_fp16, x = input_471_cast_fp16)[name = string("normed_565_cast_fp16")]; - tensor normed_567_begin_0 = const()[name = string("normed_567_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_567_end_0 = const()[name = string("normed_567_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_567_end_mask_0 = const()[name = string("normed_567_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_567_cast_fp16 = slice_by_index(begin = normed_567_begin_0, end = normed_567_end_0, end_mask = normed_567_end_mask_0, x = normed_565_cast_fp16)[name = string("normed_567_cast_fp16")]; - tensor var_19346_to_fp16 = const()[name = string("op_19346_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491443968)))]; - tensor attn_output_239_cast_fp16 = mul(x = normed_567_cast_fp16, y = var_19346_to_fp16)[name = string("attn_output_239_cast_fp16")]; - tensor hidden_states_379_cast_fp16 = add(x = hidden_states_369_cast_fp16, y = attn_output_239_cast_fp16)[name = string("hidden_states_379_cast_fp16")]; - int32 var_19359 = const()[name = string("op_19359"), val = int32(-1)]; - fp16 const_1004_promoted_to_fp16 = const()[name = string("const_1004_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_19361_cast_fp16 = mul(x = hidden_states_379_cast_fp16, y = const_1004_promoted_to_fp16)[name = string("op_19361_cast_fp16")]; - bool input_473_interleave_0 = const()[name = string("input_473_interleave_0"), val = bool(false)]; - tensor input_473_cast_fp16 = concat(axis = var_19359, interleave = input_473_interleave_0, values = (hidden_states_379_cast_fp16, var_19361_cast_fp16))[name = string("input_473_cast_fp16")]; - tensor normed_569_axes_0 = const()[name = string("normed_569_axes_0"), val = tensor([-1])]; - fp16 var_19356_to_fp16 = const()[name = string("op_19356_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_569_cast_fp16 = layer_norm(axes = normed_569_axes_0, epsilon = var_19356_to_fp16, x = input_473_cast_fp16)[name = string("normed_569_cast_fp16")]; - tensor normed_571_begin_0 = const()[name = string("normed_571_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_571_end_0 = const()[name = string("normed_571_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_571_end_mask_0 = const()[name = string("normed_571_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_571_cast_fp16 = slice_by_index(begin = normed_571_begin_0, end = normed_571_end_0, end_mask = normed_571_end_mask_0, x = normed_569_cast_fp16)[name = string("normed_571_cast_fp16")]; - tensor var_19375_to_fp16 = const()[name = string("op_19375_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491446336)))]; - tensor x_381_cast_fp16 = mul(x = normed_571_cast_fp16, y = var_19375_to_fp16)[name = string("x_381_cast_fp16")]; - tensor var_19387 = const()[name = string("op_19387"), val = tensor([0, 2, 1])]; - tensor input_475_axes_0 = const()[name = string("input_475_axes_0"), val = tensor([2])]; - tensor var_19388_cast_fp16 = transpose(perm = var_19387, x = x_381_cast_fp16)[name = string("transpose_19")]; - tensor input_475_cast_fp16 = expand_dims(axes = input_475_axes_0, x = var_19388_cast_fp16)[name = string("input_475_cast_fp16")]; - string x_383_pad_type_0 = const()[name = string("x_383_pad_type_0"), val = string("valid")]; - tensor x_383_strides_0 = const()[name = string("x_383_strides_0"), val = tensor([1, 1])]; - tensor x_383_pad_0 = const()[name = string("x_383_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_383_dilations_0 = const()[name = string("x_383_dilations_0"), val = tensor([1, 1])]; - int32 x_383_groups_0 = const()[name = string("x_383_groups_0"), val = int32(1)]; - tensor model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1471238848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1477210880))))[name = string("model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_383_cast_fp16 = conv(dilations = x_383_dilations_0, groups = x_383_groups_0, pad = x_383_pad_0, pad_type = x_383_pad_type_0, strides = x_383_strides_0, weight = model_model_layers_23_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_475_cast_fp16)[name = string("x_383_cast_fp16")]; - string b_47_pad_type_0 = const()[name = string("b_47_pad_type_0"), val = string("valid")]; - tensor b_47_strides_0 = const()[name = string("b_47_strides_0"), val = tensor([1, 1])]; - tensor b_47_pad_0 = const()[name = string("b_47_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_47_dilations_0 = const()[name = string("b_47_dilations_0"), val = tensor([1, 1])]; - int32 b_47_groups_0 = const()[name = string("b_47_groups_0"), val = int32(1)]; - tensor model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1477321536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1483293568))))[name = string("model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_47_cast_fp16 = conv(dilations = b_47_dilations_0, groups = b_47_groups_0, pad = b_47_pad_0, pad_type = b_47_pad_type_0, strides = b_47_strides_0, weight = model_model_layers_23_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_475_cast_fp16)[name = string("b_47_cast_fp16")]; - string var_19413_mode_0 = const()[name = string("op_19413_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_19413_cast_fp16 = gelu(mode = var_19413_mode_0, x = x_383_cast_fp16)[name = string("op_19413_cast_fp16")]; - tensor input_477_cast_fp16 = mul(x = var_19413_cast_fp16, y = b_47_cast_fp16)[name = string("input_477_cast_fp16")]; - string e_47_pad_type_0 = const()[name = string("e_47_pad_type_0"), val = string("valid")]; - tensor e_47_strides_0 = const()[name = string("e_47_strides_0"), val = tensor([1, 1])]; - tensor e_47_pad_0 = const()[name = string("e_47_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_47_dilations_0 = const()[name = string("e_47_dilations_0"), val = tensor([1, 1])]; - int32 e_47_groups_0 = const()[name = string("e_47_groups_0"), val = int32(1)]; - tensor model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503614080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509586112))))[name = string("model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_47_cast_fp16 = conv(dilations = e_47_dilations_0, groups = e_47_groups_0, pad = e_47_pad_0, pad_type = e_47_pad_type_0, strides = e_47_strides_0, weight = model_model_layers_23_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_477_cast_fp16)[name = string("e_47_cast_fp16")]; - tensor var_19421_axes_0 = const()[name = string("op_19421_axes_0"), val = tensor([2])]; - tensor var_19421_cast_fp16 = squeeze(axes = var_19421_axes_0, x = e_47_cast_fp16)[name = string("op_19421_cast_fp16")]; - tensor var_19422 = const()[name = string("op_19422"), val = tensor([0, 2, 1])]; - int32 var_19433 = const()[name = string("op_19433"), val = int32(-1)]; - fp16 const_1008_promoted_to_fp16 = const()[name = string("const_1008_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_381_cast_fp16 = transpose(perm = var_19422, x = var_19421_cast_fp16)[name = string("transpose_18")]; - tensor var_19435_cast_fp16 = mul(x = hidden_states_381_cast_fp16, y = const_1008_promoted_to_fp16)[name = string("op_19435_cast_fp16")]; - bool input_479_interleave_0 = const()[name = string("input_479_interleave_0"), val = bool(false)]; - tensor input_479_cast_fp16 = concat(axis = var_19433, interleave = input_479_interleave_0, values = (hidden_states_381_cast_fp16, var_19435_cast_fp16))[name = string("input_479_cast_fp16")]; - tensor normed_573_axes_0 = const()[name = string("normed_573_axes_0"), val = tensor([-1])]; - fp16 var_19430_to_fp16 = const()[name = string("op_19430_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_573_cast_fp16 = layer_norm(axes = normed_573_axes_0, epsilon = var_19430_to_fp16, x = input_479_cast_fp16)[name = string("normed_573_cast_fp16")]; - tensor normed_575_begin_0 = const()[name = string("normed_575_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_575_end_0 = const()[name = string("normed_575_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_575_end_mask_0 = const()[name = string("normed_575_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_575_cast_fp16 = slice_by_index(begin = normed_575_begin_0, end = normed_575_end_0, end_mask = normed_575_end_mask_0, x = normed_573_cast_fp16)[name = string("normed_575_cast_fp16")]; - tensor var_19449_to_fp16 = const()[name = string("op_19449_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509604608)))]; - tensor hidden_states_383_cast_fp16 = mul(x = normed_575_cast_fp16, y = var_19449_to_fp16)[name = string("hidden_states_383_cast_fp16")]; - tensor hidden_states_385_cast_fp16 = add(x = hidden_states_379_cast_fp16, y = hidden_states_383_cast_fp16)[name = string("hidden_states_385_cast_fp16")]; - int32 var_19503 = const()[name = string("op_19503"), val = int32(-1)]; - fp16 const_1013_promoted_to_fp16 = const()[name = string("const_1013_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_19505_cast_fp16 = mul(x = hidden_states_385_cast_fp16, y = const_1013_promoted_to_fp16)[name = string("op_19505_cast_fp16")]; - bool input_481_interleave_0 = const()[name = string("input_481_interleave_0"), val = bool(false)]; - tensor input_481_cast_fp16 = concat(axis = var_19503, interleave = input_481_interleave_0, values = (hidden_states_385_cast_fp16, var_19505_cast_fp16))[name = string("input_481_cast_fp16")]; - tensor normed_577_axes_0 = const()[name = string("normed_577_axes_0"), val = tensor([-1])]; - fp16 var_19500_to_fp16 = const()[name = string("op_19500_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_577_cast_fp16 = layer_norm(axes = normed_577_axes_0, epsilon = var_19500_to_fp16, x = input_481_cast_fp16)[name = string("normed_577_cast_fp16")]; - tensor normed_579_begin_0 = const()[name = string("normed_579_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_579_end_0 = const()[name = string("normed_579_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_579_end_mask_0 = const()[name = string("normed_579_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_579_cast_fp16 = slice_by_index(begin = normed_579_begin_0, end = normed_579_end_0, end_mask = normed_579_end_mask_0, x = normed_577_cast_fp16)[name = string("normed_579_cast_fp16")]; - tensor var_19519_to_fp16 = const()[name = string("op_19519_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509606976)))]; - tensor hidden_states_387_cast_fp16 = mul(x = normed_579_cast_fp16, y = var_19519_to_fp16)[name = string("hidden_states_387_cast_fp16")]; - tensor var_19530 = const()[name = string("op_19530"), val = tensor([0, 2, 1])]; - tensor var_19533_axes_0 = const()[name = string("op_19533_axes_0"), val = tensor([2])]; - tensor var_19531_cast_fp16 = transpose(perm = var_19530, x = hidden_states_387_cast_fp16)[name = string("transpose_17")]; - tensor var_19533_cast_fp16 = expand_dims(axes = var_19533_axes_0, x = var_19531_cast_fp16)[name = string("op_19533_cast_fp16")]; - string query_states_193_pad_type_0 = const()[name = string("query_states_193_pad_type_0"), val = string("valid")]; - tensor query_states_193_strides_0 = const()[name = string("query_states_193_strides_0"), val = tensor([1, 1])]; - tensor query_states_193_pad_0 = const()[name = string("query_states_193_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_193_dilations_0 = const()[name = string("query_states_193_dilations_0"), val = tensor([1, 1])]; - int32 query_states_193_groups_0 = const()[name = string("query_states_193_groups_0"), val = int32(1)]; - tensor query_states_193 = conv(dilations = query_states_193_dilations_0, groups = query_states_193_groups_0, pad = query_states_193_pad_0, pad_type = query_states_193_pad_type_0, strides = query_states_193_strides_0, weight = model_model_layers_24_self_attn_q_proj_weight_palettized, x = var_19533_cast_fp16)[name = string("query_states_193")]; - string key_states_241_pad_type_0 = const()[name = string("key_states_241_pad_type_0"), val = string("valid")]; - tensor key_states_241_strides_0 = const()[name = string("key_states_241_strides_0"), val = tensor([1, 1])]; - tensor key_states_241_pad_0 = const()[name = string("key_states_241_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_241_dilations_0 = const()[name = string("key_states_241_dilations_0"), val = tensor([1, 1])]; - int32 key_states_241_groups_0 = const()[name = string("key_states_241_groups_0"), val = int32(1)]; - tensor key_states_241 = conv(dilations = key_states_241_dilations_0, groups = key_states_241_groups_0, pad = key_states_241_pad_0, pad_type = key_states_241_pad_type_0, strides = key_states_241_strides_0, weight = model_model_layers_24_self_attn_k_proj_weight_palettized, x = var_19533_cast_fp16)[name = string("key_states_241")]; - string value_states_193_pad_type_0 = const()[name = string("value_states_193_pad_type_0"), val = string("valid")]; - tensor value_states_193_strides_0 = const()[name = string("value_states_193_strides_0"), val = tensor([1, 1])]; - tensor value_states_193_pad_0 = const()[name = string("value_states_193_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_193_dilations_0 = const()[name = string("value_states_193_dilations_0"), val = tensor([1, 1])]; - int32 value_states_193_groups_0 = const()[name = string("value_states_193_groups_0"), val = int32(1)]; - tensor value_states_193 = conv(dilations = value_states_193_dilations_0, groups = value_states_193_groups_0, pad = value_states_193_pad_0, pad_type = value_states_193_pad_type_0, strides = value_states_193_strides_0, weight = model_model_layers_24_self_attn_v_proj_weight_palettized, x = var_19533_cast_fp16)[name = string("value_states_193")]; - tensor var_19575 = const()[name = string("op_19575"), val = tensor([1, 4, 256, 64])]; - tensor var_19576 = reshape(shape = var_19575, x = query_states_193)[name = string("op_19576")]; - tensor var_19581 = const()[name = string("op_19581"), val = tensor([0, 1, 3, 2])]; - tensor var_19586 = const()[name = string("op_19586"), val = tensor([1, 1, 256, 64])]; - tensor var_19587 = reshape(shape = var_19586, x = key_states_241)[name = string("op_19587")]; - tensor var_19592 = const()[name = string("op_19592"), val = tensor([0, 1, 3, 2])]; - tensor var_19597 = const()[name = string("op_19597"), val = tensor([1, 1, 256, 64])]; - tensor var_19598 = reshape(shape = var_19597, x = value_states_193)[name = string("op_19598")]; - tensor var_19603 = const()[name = string("op_19603"), val = tensor([0, 1, 3, 2])]; - int32 var_19614 = const()[name = string("op_19614"), val = int32(-1)]; - fp16 const_1018_promoted = const()[name = string("const_1018_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_389 = transpose(perm = var_19581, x = var_19576)[name = string("transpose_16")]; - tensor var_19616 = mul(x = hidden_states_389, y = const_1018_promoted)[name = string("op_19616")]; - bool input_485_interleave_0 = const()[name = string("input_485_interleave_0"), val = bool(false)]; - tensor input_485 = concat(axis = var_19614, interleave = input_485_interleave_0, values = (hidden_states_389, var_19616))[name = string("input_485")]; - tensor normed_581_axes_0 = const()[name = string("normed_581_axes_0"), val = tensor([-1])]; - fp16 var_19611_to_fp16 = const()[name = string("op_19611_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_581_cast_fp16 = layer_norm(axes = normed_581_axes_0, epsilon = var_19611_to_fp16, x = input_485)[name = string("normed_581_cast_fp16")]; - tensor normed_583_begin_0 = const()[name = string("normed_583_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_583_end_0 = const()[name = string("normed_583_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_583_end_mask_0 = const()[name = string("normed_583_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_583 = slice_by_index(begin = normed_583_begin_0, end = normed_583_end_0, end_mask = normed_583_end_mask_0, x = normed_581_cast_fp16)[name = string("normed_583")]; - tensor var_19630_to_fp16 = const()[name = string("op_19630_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509609344)))]; - tensor q_49_cast_fp16 = mul(x = normed_583, y = var_19630_to_fp16)[name = string("q_49_cast_fp16")]; - int32 var_19641 = const()[name = string("op_19641"), val = int32(-1)]; - fp16 const_1022_promoted = const()[name = string("const_1022_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_391 = transpose(perm = var_19592, x = var_19587)[name = string("transpose_15")]; - tensor var_19643 = mul(x = hidden_states_391, y = const_1022_promoted)[name = string("op_19643")]; - bool input_487_interleave_0 = const()[name = string("input_487_interleave_0"), val = bool(false)]; - tensor input_487 = concat(axis = var_19641, interleave = input_487_interleave_0, values = (hidden_states_391, var_19643))[name = string("input_487")]; - tensor normed_585_axes_0 = const()[name = string("normed_585_axes_0"), val = tensor([-1])]; - fp16 var_19638_to_fp16 = const()[name = string("op_19638_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_585_cast_fp16 = layer_norm(axes = normed_585_axes_0, epsilon = var_19638_to_fp16, x = input_487)[name = string("normed_585_cast_fp16")]; - tensor normed_587_begin_0 = const()[name = string("normed_587_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_587_end_0 = const()[name = string("normed_587_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_587_end_mask_0 = const()[name = string("normed_587_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_587 = slice_by_index(begin = normed_587_begin_0, end = normed_587_end_0, end_mask = normed_587_end_mask_0, x = normed_585_cast_fp16)[name = string("normed_587")]; - tensor var_19657_to_fp16 = const()[name = string("op_19657_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509609920)))]; - tensor k_49_cast_fp16 = mul(x = normed_587, y = var_19657_to_fp16)[name = string("k_49_cast_fp16")]; - tensor var_19671_cast_fp16 = mul(x = q_49_cast_fp16, y = cos_5)[name = string("op_19671_cast_fp16")]; - tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_97_cast_fp16 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = q_49_cast_fp16)[name = string("x1_97_cast_fp16")]; - tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_97_cast_fp16 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = q_49_cast_fp16)[name = string("x2_97_cast_fp16")]; - fp16 const_1028_promoted_to_fp16 = const()[name = string("const_1028_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_19692_cast_fp16 = mul(x = x2_97_cast_fp16, y = const_1028_promoted_to_fp16)[name = string("op_19692_cast_fp16")]; - int32 var_19694 = const()[name = string("op_19694"), val = int32(-1)]; - bool var_19695_interleave_0 = const()[name = string("op_19695_interleave_0"), val = bool(false)]; - tensor var_19695_cast_fp16 = concat(axis = var_19694, interleave = var_19695_interleave_0, values = (var_19692_cast_fp16, x1_97_cast_fp16))[name = string("op_19695_cast_fp16")]; - tensor var_19696_cast_fp16 = mul(x = var_19695_cast_fp16, y = sin_5)[name = string("op_19696_cast_fp16")]; - tensor query_states_195_cast_fp16 = add(x = var_19671_cast_fp16, y = var_19696_cast_fp16)[name = string("query_states_195_cast_fp16")]; - tensor var_19699_cast_fp16 = mul(x = k_49_cast_fp16, y = cos_5)[name = string("op_19699_cast_fp16")]; - tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_99_cast_fp16 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = k_49_cast_fp16)[name = string("x1_99_cast_fp16")]; - tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_99_cast_fp16 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = k_49_cast_fp16)[name = string("x2_99_cast_fp16")]; - fp16 const_1031_promoted_to_fp16 = const()[name = string("const_1031_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_19720_cast_fp16 = mul(x = x2_99_cast_fp16, y = const_1031_promoted_to_fp16)[name = string("op_19720_cast_fp16")]; - int32 var_19722 = const()[name = string("op_19722"), val = int32(-1)]; - bool var_19723_interleave_0 = const()[name = string("op_19723_interleave_0"), val = bool(false)]; - tensor var_19723_cast_fp16 = concat(axis = var_19722, interleave = var_19723_interleave_0, values = (var_19720_cast_fp16, x1_99_cast_fp16))[name = string("op_19723_cast_fp16")]; - tensor var_19724_cast_fp16 = mul(x = var_19723_cast_fp16, y = sin_5)[name = string("op_19724_cast_fp16")]; - tensor key_states_243_cast_fp16 = add(x = var_19699_cast_fp16, y = var_19724_cast_fp16)[name = string("key_states_243_cast_fp16")]; - tensor key_slice_41_begin_0 = const()[name = string("key_slice_41_begin_0"), val = tensor([20, 0, 0, 0])]; - tensor key_slice_41_end_0 = const()[name = string("key_slice_41_end_0"), val = tensor([21, 1, 512, 256])]; - tensor key_slice_41_end_mask_0 = const()[name = string("key_slice_41_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_41_cast_fp16 = slice_by_index(begin = key_slice_41_begin_0, end = key_slice_41_end_0, end_mask = key_slice_41_end_mask_0, x = coreml_update_state_97)[name = string("key_slice_41_cast_fp16")]; - tensor var_19761_begin_0 = const()[name = string("op_19761_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_19761_end_0 = const()[name = string("op_19761_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_19761_end_mask_0 = const()[name = string("op_19761_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_19761_cast_fp16 = slice_by_index(begin = var_19761_begin_0, end = var_19761_end_0, end_mask = var_19761_end_mask_0, x = key_slice_41_cast_fp16)[name = string("op_19761_cast_fp16")]; - int32 var_19788 = const()[name = string("op_19788"), val = int32(2)]; - bool shifted_key_41_interleave_0 = const()[name = string("shifted_key_41_interleave_0"), val = bool(false)]; - tensor shifted_key_41_cast_fp16 = concat(axis = var_19788, interleave = shifted_key_41_interleave_0, values = (var_19761_cast_fp16, key_states_243_cast_fp16))[name = string("shifted_key_41_cast_fp16")]; - tensor concat_352 = const()[name = string("concat_352"), val = tensor([20, 0, 0, 0])]; - tensor concat_353 = const()[name = string("concat_353"), val = tensor([21, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16 = slice_update(begin = concat_352, begin_mask = model_model_kv_cache_local_internal_tensor_assign_41_begin_mask_0, end = concat_353, end_mask = model_model_kv_cache_local_internal_tensor_assign_41_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_41_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_41_stride_0, update = shifted_key_41_cast_fp16, x = coreml_update_state_97)[name = string("model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_41_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_204_write_state")]; - tensor coreml_update_state_100 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_204")]; - tensor value_slice_41_begin_0 = const()[name = string("value_slice_41_begin_0"), val = tensor([42, 0, 0, 0])]; - tensor value_slice_41_end_0 = const()[name = string("value_slice_41_end_0"), val = tensor([43, 1, 512, 256])]; - tensor value_slice_41_end_mask_0 = const()[name = string("value_slice_41_end_mask_0"), val = tensor([false, true, true, true])]; - tensor value_slice_41_cast_fp16 = slice_by_index(begin = value_slice_41_begin_0, end = value_slice_41_end_0, end_mask = value_slice_41_end_mask_0, x = coreml_update_state_100)[name = string("value_slice_41_cast_fp16")]; - tensor var_19831_begin_0 = const()[name = string("op_19831_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_19831_end_0 = const()[name = string("op_19831_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_19831_end_mask_0 = const()[name = string("op_19831_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_19831_cast_fp16 = slice_by_index(begin = var_19831_begin_0, end = var_19831_end_0, end_mask = var_19831_end_mask_0, x = value_slice_41_cast_fp16)[name = string("op_19831_cast_fp16")]; - int32 var_19858 = const()[name = string("op_19858"), val = int32(2)]; - bool shifted_value_41_interleave_0 = const()[name = string("shifted_value_41_interleave_0"), val = bool(false)]; - tensor value_states_195 = transpose(perm = var_19603, x = var_19598)[name = string("transpose_14")]; - tensor shifted_value_41_cast_fp16 = concat(axis = var_19858, interleave = shifted_value_41_interleave_0, values = (var_19831_cast_fp16, value_states_195))[name = string("shifted_value_41_cast_fp16")]; - tensor concat_354 = const()[name = string("concat_354"), val = tensor([42, 0, 0, 0])]; - tensor concat_355 = const()[name = string("concat_355"), val = tensor([43, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16 = slice_update(begin = concat_354, begin_mask = model_model_kv_cache_local_internal_tensor_assign_42_begin_mask_0, end = concat_355, end_mask = model_model_kv_cache_local_internal_tensor_assign_42_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_42_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_42_stride_0, update = shifted_value_41_cast_fp16, x = coreml_update_state_100)[name = string("model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_42_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_205_write_state")]; - tensor coreml_update_state_101 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_205")]; - tensor var_19886_begin_0 = const()[name = string("op_19886_begin_0"), val = tensor([20, 0, 0, 0])]; - tensor var_19886_end_0 = const()[name = string("op_19886_end_0"), val = tensor([21, 1, 512, 256])]; - tensor var_19886_end_mask_0 = const()[name = string("op_19886_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_19886_cast_fp16 = slice_by_index(begin = var_19886_begin_0, end = var_19886_end_0, end_mask = var_19886_end_mask_0, x = coreml_update_state_101)[name = string("op_19886_cast_fp16")]; - tensor var_19893_begin_0 = const()[name = string("op_19893_begin_0"), val = tensor([42, 0, 0, 0])]; - tensor var_19893_end_0 = const()[name = string("op_19893_end_0"), val = tensor([43, 1, 512, 256])]; - tensor var_19893_end_mask_0 = const()[name = string("op_19893_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_19893_cast_fp16 = slice_by_index(begin = var_19893_begin_0, end = var_19893_end_0, end_mask = var_19893_end_mask_0, x = coreml_update_state_101)[name = string("op_19893_cast_fp16")]; - tensor var_19932 = const()[name = string("op_19932"), val = tensor([1, 4, 1, 1])]; - tensor x_389_cast_fp16 = tile(reps = var_19932, x = var_19886_cast_fp16)[name = string("x_389_cast_fp16")]; - tensor var_19952 = const()[name = string("op_19952"), val = tensor([1, 4, 1, 1])]; - tensor x_395_cast_fp16 = tile(reps = var_19952, x = var_19893_cast_fp16)[name = string("x_395_cast_fp16")]; - bool var_19979_transpose_x_0 = const()[name = string("op_19979_transpose_x_0"), val = bool(false)]; - bool var_19979_transpose_y_0 = const()[name = string("op_19979_transpose_y_0"), val = bool(true)]; - tensor var_19979 = matmul(transpose_x = var_19979_transpose_x_0, transpose_y = var_19979_transpose_y_0, x = query_states_195_cast_fp16, y = x_389_cast_fp16)[name = string("op_19979")]; - fp16 var_19980_to_fp16 = const()[name = string("op_19980_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_97_cast_fp16 = mul(x = var_19979, y = var_19980_to_fp16)[name = string("attn_weights_97_cast_fp16")]; - tensor attn_weights_99_cast_fp16 = add(x = attn_weights_97_cast_fp16, y = mask_slice_1)[name = string("attn_weights_99_cast_fp16")]; - int32 var_20015 = const()[name = string("op_20015"), val = int32(-1)]; - tensor var_20017_cast_fp16 = softmax(axis = var_20015, x = attn_weights_99_cast_fp16)[name = string("op_20017_cast_fp16")]; - tensor concat_360 = const()[name = string("concat_360"), val = tensor([4, 64, 512])]; - tensor reshape_72_cast_fp16 = reshape(shape = concat_360, x = var_20017_cast_fp16)[name = string("reshape_72_cast_fp16")]; - tensor concat_361 = const()[name = string("concat_361"), val = tensor([4, 512, 256])]; - tensor reshape_73_cast_fp16 = reshape(shape = concat_361, x = x_395_cast_fp16)[name = string("reshape_73_cast_fp16")]; - bool matmul_24_transpose_x_0 = const()[name = string("matmul_24_transpose_x_0"), val = bool(false)]; - bool matmul_24_transpose_y_0 = const()[name = string("matmul_24_transpose_y_0"), val = bool(false)]; - tensor matmul_24_cast_fp16 = matmul(transpose_x = matmul_24_transpose_x_0, transpose_y = matmul_24_transpose_y_0, x = reshape_72_cast_fp16, y = reshape_73_cast_fp16)[name = string("matmul_24_cast_fp16")]; - tensor concat_365 = const()[name = string("concat_365"), val = tensor([1, 4, 64, 256])]; - tensor reshape_74_cast_fp16 = reshape(shape = concat_365, x = matmul_24_cast_fp16)[name = string("reshape_74_cast_fp16")]; - tensor var_20029_perm_0 = const()[name = string("op_20029_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_20048 = const()[name = string("op_20048"), val = tensor([1, 64, 1024])]; - tensor var_20029_cast_fp16 = transpose(perm = var_20029_perm_0, x = reshape_74_cast_fp16)[name = string("transpose_13")]; - tensor attn_output_245_cast_fp16 = reshape(shape = var_20048, x = var_20029_cast_fp16)[name = string("attn_output_245_cast_fp16")]; - tensor var_20053 = const()[name = string("op_20053"), val = tensor([0, 2, 1])]; - string var_20069_pad_type_0 = const()[name = string("op_20069_pad_type_0"), val = string("valid")]; - int32 var_20069_groups_0 = const()[name = string("op_20069_groups_0"), val = int32(1)]; - tensor var_20069_strides_0 = const()[name = string("op_20069_strides_0"), val = tensor([1])]; - tensor var_20069_pad_0 = const()[name = string("op_20069_pad_0"), val = tensor([0, 0])]; - tensor var_20069_dilations_0 = const()[name = string("op_20069_dilations_0"), val = tensor([1])]; - tensor squeeze_24_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(509610496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(510495296))))[name = string("squeeze_24_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_20054_cast_fp16 = transpose(perm = var_20053, x = attn_output_245_cast_fp16)[name = string("transpose_12")]; - tensor var_20069_cast_fp16 = conv(dilations = var_20069_dilations_0, groups = var_20069_groups_0, pad = var_20069_pad_0, pad_type = var_20069_pad_type_0, strides = var_20069_strides_0, weight = squeeze_24_cast_fp16_to_fp32_to_fp16_palettized, x = var_20054_cast_fp16)[name = string("op_20069_cast_fp16")]; - tensor var_20073 = const()[name = string("op_20073"), val = tensor([0, 2, 1])]; - int32 var_20084 = const()[name = string("op_20084"), val = int32(-1)]; - fp16 const_1042_promoted_to_fp16 = const()[name = string("const_1042_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_393_cast_fp16 = transpose(perm = var_20073, x = var_20069_cast_fp16)[name = string("transpose_11")]; - tensor var_20086_cast_fp16 = mul(x = hidden_states_393_cast_fp16, y = const_1042_promoted_to_fp16)[name = string("op_20086_cast_fp16")]; - bool input_491_interleave_0 = const()[name = string("input_491_interleave_0"), val = bool(false)]; - tensor input_491_cast_fp16 = concat(axis = var_20084, interleave = input_491_interleave_0, values = (hidden_states_393_cast_fp16, var_20086_cast_fp16))[name = string("input_491_cast_fp16")]; - tensor normed_589_axes_0 = const()[name = string("normed_589_axes_0"), val = tensor([-1])]; - fp16 var_20081_to_fp16 = const()[name = string("op_20081_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_589_cast_fp16 = layer_norm(axes = normed_589_axes_0, epsilon = var_20081_to_fp16, x = input_491_cast_fp16)[name = string("normed_589_cast_fp16")]; - tensor normed_591_begin_0 = const()[name = string("normed_591_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_591_end_0 = const()[name = string("normed_591_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_591_end_mask_0 = const()[name = string("normed_591_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_591_cast_fp16 = slice_by_index(begin = normed_591_begin_0, end = normed_591_end_0, end_mask = normed_591_end_mask_0, x = normed_589_cast_fp16)[name = string("normed_591_cast_fp16")]; - tensor var_20100_to_fp16 = const()[name = string("op_20100_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(510513792)))]; - tensor attn_output_249_cast_fp16 = mul(x = normed_591_cast_fp16, y = var_20100_to_fp16)[name = string("attn_output_249_cast_fp16")]; - tensor hidden_states_395_cast_fp16 = add(x = hidden_states_385_cast_fp16, y = attn_output_249_cast_fp16)[name = string("hidden_states_395_cast_fp16")]; - int32 var_20113 = const()[name = string("op_20113"), val = int32(-1)]; - fp16 const_1046_promoted_to_fp16 = const()[name = string("const_1046_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_20115_cast_fp16 = mul(x = hidden_states_395_cast_fp16, y = const_1046_promoted_to_fp16)[name = string("op_20115_cast_fp16")]; - bool input_493_interleave_0 = const()[name = string("input_493_interleave_0"), val = bool(false)]; - tensor input_493_cast_fp16 = concat(axis = var_20113, interleave = input_493_interleave_0, values = (hidden_states_395_cast_fp16, var_20115_cast_fp16))[name = string("input_493_cast_fp16")]; - tensor normed_593_axes_0 = const()[name = string("normed_593_axes_0"), val = tensor([-1])]; - fp16 var_20110_to_fp16 = const()[name = string("op_20110_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_593_cast_fp16 = layer_norm(axes = normed_593_axes_0, epsilon = var_20110_to_fp16, x = input_493_cast_fp16)[name = string("normed_593_cast_fp16")]; - tensor normed_595_begin_0 = const()[name = string("normed_595_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_595_end_0 = const()[name = string("normed_595_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_595_end_mask_0 = const()[name = string("normed_595_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_595_cast_fp16 = slice_by_index(begin = normed_595_begin_0, end = normed_595_end_0, end_mask = normed_595_end_mask_0, x = normed_593_cast_fp16)[name = string("normed_595_cast_fp16")]; - tensor var_20129_to_fp16 = const()[name = string("op_20129_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(510516160)))]; - tensor x_397_cast_fp16 = mul(x = normed_595_cast_fp16, y = var_20129_to_fp16)[name = string("x_397_cast_fp16")]; - tensor var_20141 = const()[name = string("op_20141"), val = tensor([0, 2, 1])]; - tensor input_495_axes_0 = const()[name = string("input_495_axes_0"), val = tensor([2])]; - tensor var_20142_cast_fp16 = transpose(perm = var_20141, x = x_397_cast_fp16)[name = string("transpose_10")]; - tensor input_495_cast_fp16 = expand_dims(axes = input_495_axes_0, x = var_20142_cast_fp16)[name = string("input_495_cast_fp16")]; - string x_399_pad_type_0 = const()[name = string("x_399_pad_type_0"), val = string("valid")]; - tensor x_399_strides_0 = const()[name = string("x_399_strides_0"), val = tensor([1, 1])]; - tensor x_399_pad_0 = const()[name = string("x_399_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_399_dilations_0 = const()[name = string("x_399_dilations_0"), val = tensor([1, 1])]; - int32 x_399_groups_0 = const()[name = string("x_399_groups_0"), val = int32(1)]; - tensor model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1483404224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489376256))))[name = string("model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_399_cast_fp16 = conv(dilations = x_399_dilations_0, groups = x_399_groups_0, pad = x_399_pad_0, pad_type = x_399_pad_type_0, strides = x_399_strides_0, weight = model_model_layers_24_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_495_cast_fp16)[name = string("x_399_cast_fp16")]; - string b_49_pad_type_0 = const()[name = string("b_49_pad_type_0"), val = string("valid")]; - tensor b_49_strides_0 = const()[name = string("b_49_strides_0"), val = tensor([1, 1])]; - tensor b_49_pad_0 = const()[name = string("b_49_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_49_dilations_0 = const()[name = string("b_49_dilations_0"), val = tensor([1, 1])]; - int32 b_49_groups_0 = const()[name = string("b_49_groups_0"), val = int32(1)]; - tensor model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489486912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1495458944))))[name = string("model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_49_cast_fp16 = conv(dilations = b_49_dilations_0, groups = b_49_groups_0, pad = b_49_pad_0, pad_type = b_49_pad_type_0, strides = b_49_strides_0, weight = model_model_layers_24_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_495_cast_fp16)[name = string("b_49_cast_fp16")]; - string var_20167_mode_0 = const()[name = string("op_20167_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_20167_cast_fp16 = gelu(mode = var_20167_mode_0, x = x_399_cast_fp16)[name = string("op_20167_cast_fp16")]; - tensor input_497_cast_fp16 = mul(x = var_20167_cast_fp16, y = b_49_cast_fp16)[name = string("input_497_cast_fp16")]; - string e_49_pad_type_0 = const()[name = string("e_49_pad_type_0"), val = string("valid")]; - tensor e_49_strides_0 = const()[name = string("e_49_strides_0"), val = tensor([1, 1])]; - tensor e_49_pad_0 = const()[name = string("e_49_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_49_dilations_0 = const()[name = string("e_49_dilations_0"), val = tensor([1, 1])]; - int32 e_49_groups_0 = const()[name = string("e_49_groups_0"), val = int32(1)]; - tensor model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522683904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528655936))))[name = string("model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_49_cast_fp16 = conv(dilations = e_49_dilations_0, groups = e_49_groups_0, pad = e_49_pad_0, pad_type = e_49_pad_type_0, strides = e_49_strides_0, weight = model_model_layers_24_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_497_cast_fp16)[name = string("e_49_cast_fp16")]; - tensor var_20175_axes_0 = const()[name = string("op_20175_axes_0"), val = tensor([2])]; - tensor var_20175_cast_fp16 = squeeze(axes = var_20175_axes_0, x = e_49_cast_fp16)[name = string("op_20175_cast_fp16")]; - tensor var_20176 = const()[name = string("op_20176"), val = tensor([0, 2, 1])]; - int32 var_20187 = const()[name = string("op_20187"), val = int32(-1)]; - fp16 const_1050_promoted_to_fp16 = const()[name = string("const_1050_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_397_cast_fp16 = transpose(perm = var_20176, x = var_20175_cast_fp16)[name = string("transpose_9")]; - tensor var_20189_cast_fp16 = mul(x = hidden_states_397_cast_fp16, y = const_1050_promoted_to_fp16)[name = string("op_20189_cast_fp16")]; - bool input_499_interleave_0 = const()[name = string("input_499_interleave_0"), val = bool(false)]; - tensor input_499_cast_fp16 = concat(axis = var_20187, interleave = input_499_interleave_0, values = (hidden_states_397_cast_fp16, var_20189_cast_fp16))[name = string("input_499_cast_fp16")]; - tensor normed_597_axes_0 = const()[name = string("normed_597_axes_0"), val = tensor([-1])]; - fp16 var_20184_to_fp16 = const()[name = string("op_20184_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_597_cast_fp16 = layer_norm(axes = normed_597_axes_0, epsilon = var_20184_to_fp16, x = input_499_cast_fp16)[name = string("normed_597_cast_fp16")]; - tensor normed_599_begin_0 = const()[name = string("normed_599_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_599_end_0 = const()[name = string("normed_599_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_599_end_mask_0 = const()[name = string("normed_599_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_599_cast_fp16 = slice_by_index(begin = normed_599_begin_0, end = normed_599_end_0, end_mask = normed_599_end_mask_0, x = normed_597_cast_fp16)[name = string("normed_599_cast_fp16")]; - tensor var_20203_to_fp16 = const()[name = string("op_20203_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528674432)))]; - tensor hidden_states_399_cast_fp16 = mul(x = normed_599_cast_fp16, y = var_20203_to_fp16)[name = string("hidden_states_399_cast_fp16")]; - tensor hidden_states_401_cast_fp16 = add(x = hidden_states_395_cast_fp16, y = hidden_states_399_cast_fp16)[name = string("hidden_states_401_cast_fp16")]; - int32 var_20257 = const()[name = string("op_20257"), val = int32(-1)]; - fp16 const_1055_promoted_to_fp16 = const()[name = string("const_1055_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_20259_cast_fp16 = mul(x = hidden_states_401_cast_fp16, y = const_1055_promoted_to_fp16)[name = string("op_20259_cast_fp16")]; - bool input_501_interleave_0 = const()[name = string("input_501_interleave_0"), val = bool(false)]; - tensor input_501_cast_fp16 = concat(axis = var_20257, interleave = input_501_interleave_0, values = (hidden_states_401_cast_fp16, var_20259_cast_fp16))[name = string("input_501_cast_fp16")]; - tensor normed_601_axes_0 = const()[name = string("normed_601_axes_0"), val = tensor([-1])]; - fp16 var_20254_to_fp16 = const()[name = string("op_20254_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_601_cast_fp16 = layer_norm(axes = normed_601_axes_0, epsilon = var_20254_to_fp16, x = input_501_cast_fp16)[name = string("normed_601_cast_fp16")]; - tensor normed_603_begin_0 = const()[name = string("normed_603_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_603_end_0 = const()[name = string("normed_603_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_603_end_mask_0 = const()[name = string("normed_603_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_603_cast_fp16 = slice_by_index(begin = normed_603_begin_0, end = normed_603_end_0, end_mask = normed_603_end_mask_0, x = normed_601_cast_fp16)[name = string("normed_603_cast_fp16")]; - tensor var_20273_to_fp16 = const()[name = string("op_20273_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528676800)))]; - tensor hidden_states_403_cast_fp16 = mul(x = normed_603_cast_fp16, y = var_20273_to_fp16)[name = string("hidden_states_403_cast_fp16")]; - tensor var_20284 = const()[name = string("op_20284"), val = tensor([0, 2, 1])]; - tensor var_20287_axes_0 = const()[name = string("op_20287_axes_0"), val = tensor([2])]; - tensor var_20285_cast_fp16 = transpose(perm = var_20284, x = hidden_states_403_cast_fp16)[name = string("transpose_8")]; - tensor var_20287_cast_fp16 = expand_dims(axes = var_20287_axes_0, x = var_20285_cast_fp16)[name = string("op_20287_cast_fp16")]; - string query_states_201_pad_type_0 = const()[name = string("query_states_201_pad_type_0"), val = string("valid")]; - tensor query_states_201_strides_0 = const()[name = string("query_states_201_strides_0"), val = tensor([1, 1])]; - tensor query_states_201_pad_0 = const()[name = string("query_states_201_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor query_states_201_dilations_0 = const()[name = string("query_states_201_dilations_0"), val = tensor([1, 1])]; - int32 query_states_201_groups_0 = const()[name = string("query_states_201_groups_0"), val = int32(1)]; - tensor query_states_201 = conv(dilations = query_states_201_dilations_0, groups = query_states_201_groups_0, pad = query_states_201_pad_0, pad_type = query_states_201_pad_type_0, strides = query_states_201_strides_0, weight = model_model_layers_25_self_attn_q_proj_weight_palettized, x = var_20287_cast_fp16)[name = string("query_states_201")]; - string key_states_251_pad_type_0 = const()[name = string("key_states_251_pad_type_0"), val = string("valid")]; - tensor key_states_251_strides_0 = const()[name = string("key_states_251_strides_0"), val = tensor([1, 1])]; - tensor key_states_251_pad_0 = const()[name = string("key_states_251_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor key_states_251_dilations_0 = const()[name = string("key_states_251_dilations_0"), val = tensor([1, 1])]; - int32 key_states_251_groups_0 = const()[name = string("key_states_251_groups_0"), val = int32(1)]; - tensor key_states_251 = conv(dilations = key_states_251_dilations_0, groups = key_states_251_groups_0, pad = key_states_251_pad_0, pad_type = key_states_251_pad_type_0, strides = key_states_251_strides_0, weight = model_model_layers_25_self_attn_k_proj_weight_palettized, x = var_20287_cast_fp16)[name = string("key_states_251")]; - string value_states_201_pad_type_0 = const()[name = string("value_states_201_pad_type_0"), val = string("valid")]; - tensor value_states_201_strides_0 = const()[name = string("value_states_201_strides_0"), val = tensor([1, 1])]; - tensor value_states_201_pad_0 = const()[name = string("value_states_201_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor value_states_201_dilations_0 = const()[name = string("value_states_201_dilations_0"), val = tensor([1, 1])]; - int32 value_states_201_groups_0 = const()[name = string("value_states_201_groups_0"), val = int32(1)]; - tensor value_states_201 = conv(dilations = value_states_201_dilations_0, groups = value_states_201_groups_0, pad = value_states_201_pad_0, pad_type = value_states_201_pad_type_0, strides = value_states_201_strides_0, weight = model_model_layers_25_self_attn_v_proj_weight_palettized, x = var_20287_cast_fp16)[name = string("value_states_201")]; - tensor var_20329 = const()[name = string("op_20329"), val = tensor([1, 4, 256, 64])]; - tensor var_20330 = reshape(shape = var_20329, x = query_states_201)[name = string("op_20330")]; - tensor var_20335 = const()[name = string("op_20335"), val = tensor([0, 1, 3, 2])]; - tensor var_20340 = const()[name = string("op_20340"), val = tensor([1, 1, 256, 64])]; - tensor var_20341 = reshape(shape = var_20340, x = key_states_251)[name = string("op_20341")]; - tensor var_20346 = const()[name = string("op_20346"), val = tensor([0, 1, 3, 2])]; - tensor var_20351 = const()[name = string("op_20351"), val = tensor([1, 1, 256, 64])]; - tensor var_20352 = reshape(shape = var_20351, x = value_states_201)[name = string("op_20352")]; - tensor var_20357 = const()[name = string("op_20357"), val = tensor([0, 1, 3, 2])]; - int32 var_20368 = const()[name = string("op_20368"), val = int32(-1)]; - fp16 const_1060_promoted = const()[name = string("const_1060_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_405 = transpose(perm = var_20335, x = var_20330)[name = string("transpose_7")]; - tensor var_20370 = mul(x = hidden_states_405, y = const_1060_promoted)[name = string("op_20370")]; - bool input_505_interleave_0 = const()[name = string("input_505_interleave_0"), val = bool(false)]; - tensor input_505 = concat(axis = var_20368, interleave = input_505_interleave_0, values = (hidden_states_405, var_20370))[name = string("input_505")]; - tensor normed_605_axes_0 = const()[name = string("normed_605_axes_0"), val = tensor([-1])]; - fp16 var_20365_to_fp16 = const()[name = string("op_20365_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_605_cast_fp16 = layer_norm(axes = normed_605_axes_0, epsilon = var_20365_to_fp16, x = input_505)[name = string("normed_605_cast_fp16")]; - tensor normed_607_begin_0 = const()[name = string("normed_607_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_607_end_0 = const()[name = string("normed_607_end_0"), val = tensor([1, 4, 64, 256])]; - tensor normed_607_end_mask_0 = const()[name = string("normed_607_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_607 = slice_by_index(begin = normed_607_begin_0, end = normed_607_end_0, end_mask = normed_607_end_mask_0, x = normed_605_cast_fp16)[name = string("normed_607")]; - tensor var_20384_to_fp16 = const()[name = string("op_20384_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528679168)))]; - tensor q_cast_fp16 = mul(x = normed_607, y = var_20384_to_fp16)[name = string("q_cast_fp16")]; - int32 var_20395 = const()[name = string("op_20395"), val = int32(-1)]; - fp16 const_1064_promoted = const()[name = string("const_1064_promoted"), val = fp16(-0x1p+0)]; - tensor hidden_states_407 = transpose(perm = var_20346, x = var_20341)[name = string("transpose_6")]; - tensor var_20397 = mul(x = hidden_states_407, y = const_1064_promoted)[name = string("op_20397")]; - bool input_507_interleave_0 = const()[name = string("input_507_interleave_0"), val = bool(false)]; - tensor input_507 = concat(axis = var_20395, interleave = input_507_interleave_0, values = (hidden_states_407, var_20397))[name = string("input_507")]; - tensor normed_609_axes_0 = const()[name = string("normed_609_axes_0"), val = tensor([-1])]; - fp16 var_20392_to_fp16 = const()[name = string("op_20392_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_609_cast_fp16 = layer_norm(axes = normed_609_axes_0, epsilon = var_20392_to_fp16, x = input_507)[name = string("normed_609_cast_fp16")]; - tensor normed_611_begin_0 = const()[name = string("normed_611_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor normed_611_end_0 = const()[name = string("normed_611_end_0"), val = tensor([1, 1, 64, 256])]; - tensor normed_611_end_mask_0 = const()[name = string("normed_611_end_mask_0"), val = tensor([true, true, true, false])]; - tensor normed_611 = slice_by_index(begin = normed_611_begin_0, end = normed_611_end_0, end_mask = normed_611_end_mask_0, x = normed_609_cast_fp16)[name = string("normed_611")]; - tensor var_20411_to_fp16 = const()[name = string("op_20411_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528679744)))]; - tensor k_cast_fp16 = mul(x = normed_611, y = var_20411_to_fp16)[name = string("k_cast_fp16")]; - tensor var_20425_cast_fp16 = mul(x = q_cast_fp16, y = cos_5)[name = string("op_20425_cast_fp16")]; - tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 4, 64, 128])]; - tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_101_cast_fp16 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = q_cast_fp16)[name = string("x1_101_cast_fp16")]; - tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 4, 64, 256])]; - tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_101_cast_fp16 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = q_cast_fp16)[name = string("x2_101_cast_fp16")]; - fp16 const_1070_promoted_to_fp16 = const()[name = string("const_1070_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_20446_cast_fp16 = mul(x = x2_101_cast_fp16, y = const_1070_promoted_to_fp16)[name = string("op_20446_cast_fp16")]; - int32 var_20448 = const()[name = string("op_20448"), val = int32(-1)]; - bool var_20449_interleave_0 = const()[name = string("op_20449_interleave_0"), val = bool(false)]; - tensor var_20449_cast_fp16 = concat(axis = var_20448, interleave = var_20449_interleave_0, values = (var_20446_cast_fp16, x1_101_cast_fp16))[name = string("op_20449_cast_fp16")]; - tensor var_20450_cast_fp16 = mul(x = var_20449_cast_fp16, y = sin_5)[name = string("op_20450_cast_fp16")]; - tensor query_states_203_cast_fp16 = add(x = var_20425_cast_fp16, y = var_20450_cast_fp16)[name = string("query_states_203_cast_fp16")]; - tensor var_20453_cast_fp16 = mul(x = k_cast_fp16, y = cos_5)[name = string("op_20453_cast_fp16")]; - tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 1, 64, 128])]; - tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_cast_fp16)[name = string("x1_cast_fp16")]; - tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 128])]; - tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 1, 64, 256])]; - tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_cast_fp16)[name = string("x2_cast_fp16")]; - fp16 const_1073_promoted_to_fp16 = const()[name = string("const_1073_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_20474_cast_fp16 = mul(x = x2_cast_fp16, y = const_1073_promoted_to_fp16)[name = string("op_20474_cast_fp16")]; - int32 var_20476 = const()[name = string("op_20476"), val = int32(-1)]; - bool var_20477_interleave_0 = const()[name = string("op_20477_interleave_0"), val = bool(false)]; - tensor var_20477_cast_fp16 = concat(axis = var_20476, interleave = var_20477_interleave_0, values = (var_20474_cast_fp16, x1_cast_fp16))[name = string("op_20477_cast_fp16")]; - tensor var_20478_cast_fp16 = mul(x = var_20477_cast_fp16, y = sin_5)[name = string("op_20478_cast_fp16")]; - tensor key_states_253_cast_fp16 = add(x = var_20453_cast_fp16, y = var_20478_cast_fp16)[name = string("key_states_253_cast_fp16")]; - tensor key_slice_begin_0 = const()[name = string("key_slice_begin_0"), val = tensor([21, 0, 0, 0])]; - tensor key_slice_end_0 = const()[name = string("key_slice_end_0"), val = tensor([22, 1, 512, 256])]; - tensor key_slice_end_mask_0 = const()[name = string("key_slice_end_mask_0"), val = tensor([false, true, true, true])]; - tensor key_slice_cast_fp16 = slice_by_index(begin = key_slice_begin_0, end = key_slice_end_0, end_mask = key_slice_end_mask_0, x = coreml_update_state_101)[name = string("key_slice_cast_fp16")]; - tensor var_20515_begin_0 = const()[name = string("op_20515_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_20515_end_0 = const()[name = string("op_20515_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_20515_end_mask_0 = const()[name = string("op_20515_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_20515_cast_fp16 = slice_by_index(begin = var_20515_begin_0, end = var_20515_end_0, end_mask = var_20515_end_mask_0, x = key_slice_cast_fp16)[name = string("op_20515_cast_fp16")]; - int32 var_20542 = const()[name = string("op_20542"), val = int32(2)]; - bool shifted_key_interleave_0 = const()[name = string("shifted_key_interleave_0"), val = bool(false)]; - tensor shifted_key_cast_fp16 = concat(axis = var_20542, interleave = shifted_key_interleave_0, values = (var_20515_cast_fp16, key_states_253_cast_fp16))[name = string("shifted_key_cast_fp16")]; - tensor concat_366 = const()[name = string("concat_366"), val = tensor([21, 0, 0, 0])]; - tensor concat_367 = const()[name = string("concat_367"), val = tensor([22, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16 = slice_update(begin = concat_366, begin_mask = model_model_kv_cache_local_internal_tensor_assign_43_begin_mask_0, end = concat_367, end_mask = model_model_kv_cache_local_internal_tensor_assign_43_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_43_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_43_stride_0, update = shifted_key_cast_fp16, x = coreml_update_state_101)[name = string("model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_43_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_206_write_state")]; - tensor coreml_update_state_102 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_206")]; - tensor value_slice_begin_0 = const()[name = string("value_slice_begin_0"), val = tensor([43, 0, 0, 0])]; - tensor value_slice_end_0 = const()[name = string("value_slice_end_0"), val = tensor([1, 1, 512, 256])]; - tensor value_slice_end_mask_0 = const()[name = string("value_slice_end_mask_0"), val = tensor([true, true, true, true])]; - tensor value_slice_cast_fp16 = slice_by_index(begin = value_slice_begin_0, end = value_slice_end_0, end_mask = value_slice_end_mask_0, x = coreml_update_state_102)[name = string("value_slice_cast_fp16")]; - tensor var_20585_begin_0 = const()[name = string("op_20585_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_20585_end_0 = const()[name = string("op_20585_end_0"), val = tensor([1, 1, 1, 256])]; - tensor var_20585_end_mask_0 = const()[name = string("op_20585_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_20585_cast_fp16 = slice_by_index(begin = var_20585_begin_0, end = var_20585_end_0, end_mask = var_20585_end_mask_0, x = value_slice_cast_fp16)[name = string("op_20585_cast_fp16")]; - int32 var_20612 = const()[name = string("op_20612"), val = int32(2)]; - bool shifted_value_interleave_0 = const()[name = string("shifted_value_interleave_0"), val = bool(false)]; - tensor value_states_203 = transpose(perm = var_20357, x = var_20352)[name = string("transpose_5")]; - tensor shifted_value_cast_fp16 = concat(axis = var_20612, interleave = shifted_value_interleave_0, values = (var_20585_cast_fp16, value_states_203))[name = string("shifted_value_cast_fp16")]; - tensor concat_368 = const()[name = string("concat_368"), val = tensor([43, 0, 0, 0])]; - tensor concat_369 = const()[name = string("concat_369"), val = tensor([44, 0, 0, 0])]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_stride_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_stride_0"), val = tensor([1, 1, 1, 1])]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0"), val = tensor([false, true, true, true])]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0 = const()[name = string("model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0"), val = tensor([false, false, false, false])]; - tensor model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16 = slice_update(begin = concat_368, begin_mask = model_model_kv_cache_local_internal_tensor_assign_44_begin_mask_0, end = concat_369, end_mask = model_model_kv_cache_local_internal_tensor_assign_44_end_mask_0, squeeze_mask = model_model_kv_cache_local_internal_tensor_assign_44_squeeze_mask_0, stride = model_model_kv_cache_local_internal_tensor_assign_44_stride_0, update = shifted_value_cast_fp16, x = coreml_update_state_102)[name = string("model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16")]; - write_state(data = model_model_kv_cache_local_internal_tensor_assign_44_cast_fp16, input = model_model_kv_cache_local)[name = string("coreml_update_state_207_write_state")]; - tensor coreml_update_state_103 = read_state(input = model_model_kv_cache_local)[name = string("coreml_update_state_207")]; - tensor var_20640_begin_0 = const()[name = string("op_20640_begin_0"), val = tensor([21, 0, 0, 0])]; - tensor var_20640_end_0 = const()[name = string("op_20640_end_0"), val = tensor([22, 1, 512, 256])]; - tensor var_20640_end_mask_0 = const()[name = string("op_20640_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_20640_cast_fp16 = slice_by_index(begin = var_20640_begin_0, end = var_20640_end_0, end_mask = var_20640_end_mask_0, x = coreml_update_state_103)[name = string("op_20640_cast_fp16")]; - tensor var_20647_begin_0 = const()[name = string("op_20647_begin_0"), val = tensor([43, 0, 0, 0])]; - tensor var_20647_end_0 = const()[name = string("op_20647_end_0"), val = tensor([1, 1, 512, 256])]; - tensor var_20647_end_mask_0 = const()[name = string("op_20647_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_20647_cast_fp16 = slice_by_index(begin = var_20647_begin_0, end = var_20647_end_0, end_mask = var_20647_end_mask_0, x = coreml_update_state_103)[name = string("op_20647_cast_fp16")]; - tensor var_20686 = const()[name = string("op_20686"), val = tensor([1, 4, 1, 1])]; - tensor x_405_cast_fp16 = tile(reps = var_20686, x = var_20640_cast_fp16)[name = string("x_405_cast_fp16")]; - tensor var_20706 = const()[name = string("op_20706"), val = tensor([1, 4, 1, 1])]; - tensor x_411_cast_fp16 = tile(reps = var_20706, x = var_20647_cast_fp16)[name = string("x_411_cast_fp16")]; - bool var_20733_transpose_x_0 = const()[name = string("op_20733_transpose_x_0"), val = bool(false)]; - bool var_20733_transpose_y_0 = const()[name = string("op_20733_transpose_y_0"), val = bool(true)]; - tensor var_20733 = matmul(transpose_x = var_20733_transpose_x_0, transpose_y = var_20733_transpose_y_0, x = query_states_203_cast_fp16, y = x_405_cast_fp16)[name = string("op_20733")]; - fp16 var_20734_to_fp16 = const()[name = string("op_20734_to_fp16"), val = fp16(0x1p-4)]; - tensor attn_weights_101_cast_fp16 = mul(x = var_20733, y = var_20734_to_fp16)[name = string("attn_weights_101_cast_fp16")]; - tensor attn_weights_cast_fp16 = add(x = attn_weights_101_cast_fp16, y = mask_slice_1)[name = string("attn_weights_cast_fp16")]; - int32 var_20769 = const()[name = string("op_20769"), val = int32(-1)]; - tensor var_20771_cast_fp16 = softmax(axis = var_20769, x = attn_weights_cast_fp16)[name = string("op_20771_cast_fp16")]; - tensor concat_374 = const()[name = string("concat_374"), val = tensor([4, 64, 512])]; - tensor reshape_75_cast_fp16 = reshape(shape = concat_374, x = var_20771_cast_fp16)[name = string("reshape_75_cast_fp16")]; - tensor concat_375 = const()[name = string("concat_375"), val = tensor([4, 512, 256])]; - tensor reshape_76_cast_fp16 = reshape(shape = concat_375, x = x_411_cast_fp16)[name = string("reshape_76_cast_fp16")]; - bool matmul_25_transpose_x_0 = const()[name = string("matmul_25_transpose_x_0"), val = bool(false)]; - bool matmul_25_transpose_y_0 = const()[name = string("matmul_25_transpose_y_0"), val = bool(false)]; - tensor matmul_25_cast_fp16 = matmul(transpose_x = matmul_25_transpose_x_0, transpose_y = matmul_25_transpose_y_0, x = reshape_75_cast_fp16, y = reshape_76_cast_fp16)[name = string("matmul_25_cast_fp16")]; - tensor concat_379 = const()[name = string("concat_379"), val = tensor([1, 4, 64, 256])]; - tensor reshape_77_cast_fp16 = reshape(shape = concat_379, x = matmul_25_cast_fp16)[name = string("reshape_77_cast_fp16")]; - tensor var_20783_perm_0 = const()[name = string("op_20783_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor var_20802 = const()[name = string("op_20802"), val = tensor([1, 64, 1024])]; - tensor var_20783_cast_fp16 = transpose(perm = var_20783_perm_0, x = reshape_77_cast_fp16)[name = string("transpose_4")]; - tensor attn_output_255_cast_fp16 = reshape(shape = var_20802, x = var_20783_cast_fp16)[name = string("attn_output_255_cast_fp16")]; - tensor var_20807 = const()[name = string("op_20807"), val = tensor([0, 2, 1])]; - string var_20823_pad_type_0 = const()[name = string("op_20823_pad_type_0"), val = string("valid")]; - int32 var_20823_groups_0 = const()[name = string("op_20823_groups_0"), val = int32(1)]; - tensor var_20823_strides_0 = const()[name = string("op_20823_strides_0"), val = tensor([1])]; - tensor var_20823_pad_0 = const()[name = string("op_20823_pad_0"), val = tensor([0, 0])]; - tensor var_20823_dilations_0 = const()[name = string("op_20823_dilations_0"), val = tensor([1])]; - tensor squeeze_25_cast_fp16_to_fp32_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528680320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529565120))))[name = string("squeeze_25_cast_fp16_to_fp32_to_fp16_palettized")]; - tensor var_20808_cast_fp16 = transpose(perm = var_20807, x = attn_output_255_cast_fp16)[name = string("transpose_3")]; - tensor var_20823_cast_fp16 = conv(dilations = var_20823_dilations_0, groups = var_20823_groups_0, pad = var_20823_pad_0, pad_type = var_20823_pad_type_0, strides = var_20823_strides_0, weight = squeeze_25_cast_fp16_to_fp32_to_fp16_palettized, x = var_20808_cast_fp16)[name = string("op_20823_cast_fp16")]; - tensor var_20827 = const()[name = string("op_20827"), val = tensor([0, 2, 1])]; - int32 var_20838 = const()[name = string("op_20838"), val = int32(-1)]; - fp16 const_1084_promoted_to_fp16 = const()[name = string("const_1084_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_409_cast_fp16 = transpose(perm = var_20827, x = var_20823_cast_fp16)[name = string("transpose_2")]; - tensor var_20840_cast_fp16 = mul(x = hidden_states_409_cast_fp16, y = const_1084_promoted_to_fp16)[name = string("op_20840_cast_fp16")]; - bool input_511_interleave_0 = const()[name = string("input_511_interleave_0"), val = bool(false)]; - tensor input_511_cast_fp16 = concat(axis = var_20838, interleave = input_511_interleave_0, values = (hidden_states_409_cast_fp16, var_20840_cast_fp16))[name = string("input_511_cast_fp16")]; - tensor normed_613_axes_0 = const()[name = string("normed_613_axes_0"), val = tensor([-1])]; - fp16 var_20835_to_fp16 = const()[name = string("op_20835_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_613_cast_fp16 = layer_norm(axes = normed_613_axes_0, epsilon = var_20835_to_fp16, x = input_511_cast_fp16)[name = string("normed_613_cast_fp16")]; - tensor normed_615_begin_0 = const()[name = string("normed_615_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_615_end_0 = const()[name = string("normed_615_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_615_end_mask_0 = const()[name = string("normed_615_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_615_cast_fp16 = slice_by_index(begin = normed_615_begin_0, end = normed_615_end_0, end_mask = normed_615_end_mask_0, x = normed_613_cast_fp16)[name = string("normed_615_cast_fp16")]; - tensor var_20854_to_fp16 = const()[name = string("op_20854_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529583616)))]; - tensor attn_output_cast_fp16 = mul(x = normed_615_cast_fp16, y = var_20854_to_fp16)[name = string("attn_output_cast_fp16")]; - tensor hidden_states_411_cast_fp16 = add(x = hidden_states_401_cast_fp16, y = attn_output_cast_fp16)[name = string("hidden_states_411_cast_fp16")]; - int32 var_20867 = const()[name = string("op_20867"), val = int32(-1)]; - fp16 const_1088_promoted_to_fp16 = const()[name = string("const_1088_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor var_20869_cast_fp16 = mul(x = hidden_states_411_cast_fp16, y = const_1088_promoted_to_fp16)[name = string("op_20869_cast_fp16")]; - bool input_513_interleave_0 = const()[name = string("input_513_interleave_0"), val = bool(false)]; - tensor input_513_cast_fp16 = concat(axis = var_20867, interleave = input_513_interleave_0, values = (hidden_states_411_cast_fp16, var_20869_cast_fp16))[name = string("input_513_cast_fp16")]; - tensor normed_617_axes_0 = const()[name = string("normed_617_axes_0"), val = tensor([-1])]; - fp16 var_20864_to_fp16 = const()[name = string("op_20864_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_617_cast_fp16 = layer_norm(axes = normed_617_axes_0, epsilon = var_20864_to_fp16, x = input_513_cast_fp16)[name = string("normed_617_cast_fp16")]; - tensor normed_619_begin_0 = const()[name = string("normed_619_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_619_end_0 = const()[name = string("normed_619_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_619_end_mask_0 = const()[name = string("normed_619_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_619_cast_fp16 = slice_by_index(begin = normed_619_begin_0, end = normed_619_end_0, end_mask = normed_619_end_mask_0, x = normed_617_cast_fp16)[name = string("normed_619_cast_fp16")]; - tensor var_20883_to_fp16 = const()[name = string("op_20883_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529585984)))]; - tensor x_413_cast_fp16 = mul(x = normed_619_cast_fp16, y = var_20883_to_fp16)[name = string("x_413_cast_fp16")]; - tensor var_20895 = const()[name = string("op_20895"), val = tensor([0, 2, 1])]; - tensor input_515_axes_0 = const()[name = string("input_515_axes_0"), val = tensor([2])]; - tensor var_20896_cast_fp16 = transpose(perm = var_20895, x = x_413_cast_fp16)[name = string("transpose_1")]; - tensor input_515_cast_fp16 = expand_dims(axes = input_515_axes_0, x = var_20896_cast_fp16)[name = string("input_515_cast_fp16")]; - string x_pad_type_0 = const()[name = string("x_pad_type_0"), val = string("valid")]; - tensor x_strides_0 = const()[name = string("x_strides_0"), val = tensor([1, 1])]; - tensor x_pad_0 = const()[name = string("x_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor x_dilations_0 = const()[name = string("x_dilations_0"), val = tensor([1, 1])]; - int32 x_groups_0 = const()[name = string("x_groups_0"), val = int32(1)]; - tensor model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1495569600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1501541632))))[name = string("model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized")]; - tensor x_cast_fp16 = conv(dilations = x_dilations_0, groups = x_groups_0, pad = x_pad_0, pad_type = x_pad_type_0, strides = x_strides_0, weight = model_model_layers_25_mlp_gate_proj_weight_promoted_to_fp16_palettized, x = input_515_cast_fp16)[name = string("x_cast_fp16")]; - string b_pad_type_0 = const()[name = string("b_pad_type_0"), val = string("valid")]; - tensor b_strides_0 = const()[name = string("b_strides_0"), val = tensor([1, 1])]; - tensor b_pad_0 = const()[name = string("b_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor b_dilations_0 = const()[name = string("b_dilations_0"), val = tensor([1, 1])]; - int32 b_groups_0 = const()[name = string("b_groups_0"), val = int32(1)]; - tensor model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1501652288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1507624320))))[name = string("model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized")]; - tensor b_cast_fp16 = conv(dilations = b_dilations_0, groups = b_groups_0, pad = b_pad_0, pad_type = b_pad_type_0, strides = b_strides_0, weight = model_model_layers_25_mlp_up_proj_weight_promoted_to_fp16_palettized, x = input_515_cast_fp16)[name = string("b_cast_fp16")]; - string var_20921_mode_0 = const()[name = string("op_20921_mode_0"), val = string("TANH_APPROXIMATION")]; - tensor var_20921_cast_fp16 = gelu(mode = var_20921_mode_0, x = x_cast_fp16)[name = string("op_20921_cast_fp16")]; - tensor input_517_cast_fp16 = mul(x = var_20921_cast_fp16, y = b_cast_fp16)[name = string("input_517_cast_fp16")]; - string e_pad_type_0 = const()[name = string("e_pad_type_0"), val = string("valid")]; - tensor e_strides_0 = const()[name = string("e_strides_0"), val = tensor([1, 1])]; - tensor e_pad_0 = const()[name = string("e_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor e_dilations_0 = const()[name = string("e_dilations_0"), val = tensor([1, 1])]; - int32 e_groups_0 = const()[name = string("e_groups_0"), val = int32(1)]; - tensor model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(541753728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547725760))))[name = string("model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized")]; - tensor e_cast_fp16 = conv(dilations = e_dilations_0, groups = e_groups_0, pad = e_pad_0, pad_type = e_pad_type_0, strides = e_strides_0, weight = model_model_layers_25_mlp_down_proj_weight_promoted_to_fp16_palettized, x = input_517_cast_fp16)[name = string("e_cast_fp16")]; - tensor var_20929_axes_0 = const()[name = string("op_20929_axes_0"), val = tensor([2])]; - tensor var_20929_cast_fp16 = squeeze(axes = var_20929_axes_0, x = e_cast_fp16)[name = string("op_20929_cast_fp16")]; - tensor var_20930 = const()[name = string("op_20930"), val = tensor([0, 2, 1])]; - int32 var_20941 = const()[name = string("op_20941"), val = int32(-1)]; - fp16 const_1092_promoted_to_fp16 = const()[name = string("const_1092_promoted_to_fp16"), val = fp16(-0x1p+0)]; - tensor hidden_states_413_cast_fp16 = transpose(perm = var_20930, x = var_20929_cast_fp16)[name = string("transpose_0")]; - tensor var_20943_cast_fp16 = mul(x = hidden_states_413_cast_fp16, y = const_1092_promoted_to_fp16)[name = string("op_20943_cast_fp16")]; - bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)]; - tensor input_cast_fp16 = concat(axis = var_20941, interleave = input_interleave_0, values = (hidden_states_413_cast_fp16, var_20943_cast_fp16))[name = string("input_cast_fp16")]; - tensor normed_621_axes_0 = const()[name = string("normed_621_axes_0"), val = tensor([-1])]; - fp16 var_20938_to_fp16 = const()[name = string("op_20938_to_fp16"), val = fp16(0x1.1p-20)]; - tensor normed_621_cast_fp16 = layer_norm(axes = normed_621_axes_0, epsilon = var_20938_to_fp16, x = input_cast_fp16)[name = string("normed_621_cast_fp16")]; - tensor normed_begin_0 = const()[name = string("normed_begin_0"), val = tensor([0, 0, 0])]; - tensor normed_end_0 = const()[name = string("normed_end_0"), val = tensor([1, 64, 1152])]; - tensor normed_end_mask_0 = const()[name = string("normed_end_mask_0"), val = tensor([true, true, false])]; - tensor normed_cast_fp16 = slice_by_index(begin = normed_begin_0, end = normed_end_0, end_mask = normed_end_mask_0, x = normed_621_cast_fp16)[name = string("normed_cast_fp16")]; - tensor var_20957_to_fp16 = const()[name = string("op_20957_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547744256)))]; - tensor hidden_states_cast_fp16 = mul(x = normed_cast_fp16, y = var_20957_to_fp16)[name = string("hidden_states_cast_fp16")]; - tensor out_1_cast_fp16 = add(x = hidden_states_411_cast_fp16, y = hidden_states_cast_fp16)[name = string("out_1_cast_fp16")]; - tensor var_20963_begin_0 = const()[name = string("op_20963_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_20963_end_0 = const()[name = string("op_20963_end_0"), val = tensor([1, 1, 512, 256])]; - tensor var_20963_end_mask_0 = const()[name = string("op_20963_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_20963_squeeze_mask_0 = const()[name = string("op_20963_squeeze_mask_0"), val = tensor([true, false, false, false])]; - tensor var_20963_cast_fp16 = slice_by_index(begin = var_20963_begin_0, end = var_20963_end_0, end_mask = var_20963_end_mask_0, squeeze_mask = var_20963_squeeze_mask_0, x = coreml_update_state_103)[name = string("op_20963_cast_fp16")]; - tensor var_20966_begin_0 = const()[name = string("op_20966_begin_0"), val = tensor([0, 0, 0])]; - tensor var_20966_end_0 = const()[name = string("op_20966_end_0"), val = tensor([1, 512, 256])]; - tensor var_20966_end_mask_0 = const()[name = string("op_20966_end_mask_0"), val = tensor([false, true, true])]; - tensor var_20966_squeeze_mask_0 = const()[name = string("op_20966_squeeze_mask_0"), val = tensor([true, false, false])]; - tensor var_20966_cast_fp16 = slice_by_index(begin = var_20966_begin_0, end = var_20966_end_0, end_mask = var_20966_end_mask_0, squeeze_mask = var_20966_squeeze_mask_0, x = var_20963_cast_fp16)[name = string("op_20966_cast_fp16")]; - tensor var_20969_begin_0 = const()[name = string("op_20969_begin_0"), val = tensor([0, 0])]; - tensor var_20969_end_0 = const()[name = string("op_20969_end_0"), val = tensor([1, 256])]; - tensor var_20969_end_mask_0 = const()[name = string("op_20969_end_mask_0"), val = tensor([false, true])]; - tensor var_20969_squeeze_mask_0 = const()[name = string("op_20969_squeeze_mask_0"), val = tensor([true, false])]; - tensor var_20969_cast_fp16 = slice_by_index(begin = var_20969_begin_0, end = var_20969_end_0, end_mask = var_20969_end_mask_0, squeeze_mask = var_20969_squeeze_mask_0, x = var_20966_cast_fp16)[name = string("op_20969_cast_fp16")]; - tensor var_20972_begin_0 = const()[name = string("op_20972_begin_0"), val = tensor([0])]; - tensor var_20972_end_0 = const()[name = string("op_20972_end_0"), val = tensor([1])]; - tensor var_20972_end_mask_0 = const()[name = string("op_20972_end_mask_0"), val = tensor([false])]; - tensor var_20972_squeeze_mask_0 = const()[name = string("op_20972_squeeze_mask_0"), val = tensor([true])]; - fp16 var_20972_cast_fp16 = slice_by_index(begin = var_20972_begin_0, end = var_20972_end_0, end_mask = var_20972_end_mask_0, squeeze_mask = var_20972_squeeze_mask_0, x = var_20969_cast_fp16)[name = string("op_20972_cast_fp16")]; - fp16 var_20973_to_fp16 = const()[name = string("op_20973_to_fp16"), val = fp16(0x0p+0)]; - fp16 dummy_local_cast_fp16 = mul(x = var_20972_cast_fp16, y = var_20973_to_fp16)[name = string("dummy_local_cast_fp16")]; - tensor var_20977_begin_0 = const()[name = string("op_20977_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_20977_end_0 = const()[name = string("op_20977_end_0"), val = tensor([1, 1, 4096, 256])]; - tensor var_20977_end_mask_0 = const()[name = string("op_20977_end_mask_0"), val = tensor([false, true, true, true])]; - tensor var_20977_squeeze_mask_0 = const()[name = string("op_20977_squeeze_mask_0"), val = tensor([true, false, false, false])]; - tensor var_20977_cast_fp16 = slice_by_index(begin = var_20977_begin_0, end = var_20977_end_0, end_mask = var_20977_end_mask_0, squeeze_mask = var_20977_squeeze_mask_0, x = coreml_update_state_99)[name = string("op_20977_cast_fp16")]; - tensor var_20980_begin_0 = const()[name = string("op_20980_begin_0"), val = tensor([0, 0, 0])]; - tensor var_20980_end_0 = const()[name = string("op_20980_end_0"), val = tensor([1, 4096, 256])]; - tensor var_20980_end_mask_0 = const()[name = string("op_20980_end_mask_0"), val = tensor([false, true, true])]; - tensor var_20980_squeeze_mask_0 = const()[name = string("op_20980_squeeze_mask_0"), val = tensor([true, false, false])]; - tensor var_20980_cast_fp16 = slice_by_index(begin = var_20980_begin_0, end = var_20980_end_0, end_mask = var_20980_end_mask_0, squeeze_mask = var_20980_squeeze_mask_0, x = var_20977_cast_fp16)[name = string("op_20980_cast_fp16")]; - tensor var_20983_begin_0 = const()[name = string("op_20983_begin_0"), val = tensor([0, 0])]; - tensor var_20983_end_0 = const()[name = string("op_20983_end_0"), val = tensor([1, 256])]; - tensor var_20983_end_mask_0 = const()[name = string("op_20983_end_mask_0"), val = tensor([false, true])]; - tensor var_20983_squeeze_mask_0 = const()[name = string("op_20983_squeeze_mask_0"), val = tensor([true, false])]; - tensor var_20983_cast_fp16 = slice_by_index(begin = var_20983_begin_0, end = var_20983_end_0, end_mask = var_20983_end_mask_0, squeeze_mask = var_20983_squeeze_mask_0, x = var_20980_cast_fp16)[name = string("op_20983_cast_fp16")]; - tensor var_20986_begin_0 = const()[name = string("op_20986_begin_0"), val = tensor([0])]; - tensor var_20986_end_0 = const()[name = string("op_20986_end_0"), val = tensor([1])]; - tensor var_20986_end_mask_0 = const()[name = string("op_20986_end_mask_0"), val = tensor([false])]; - tensor var_20986_squeeze_mask_0 = const()[name = string("op_20986_squeeze_mask_0"), val = tensor([true])]; - fp16 var_20986_cast_fp16 = slice_by_index(begin = var_20986_begin_0, end = var_20986_end_0, end_mask = var_20986_end_mask_0, squeeze_mask = var_20986_squeeze_mask_0, x = var_20983_cast_fp16)[name = string("op_20986_cast_fp16")]; - fp16 var_20987_to_fp16 = const()[name = string("op_20987_to_fp16"), val = fp16(0x0p+0)]; - fp16 dummy_global_cast_fp16 = mul(x = var_20986_cast_fp16, y = var_20987_to_fp16)[name = string("dummy_global_cast_fp16")]; - fp16 var_20990_cast_fp16 = add(x = dummy_local_cast_fp16, y = dummy_global_cast_fp16)[name = string("op_20990_cast_fp16")]; - tensor var_20994 = const()[name = string("op_20994"), val = tensor([1, 1, 1])]; - tensor var_20995_cast_fp16 = reshape(shape = var_20994, x = var_20990_cast_fp16)[name = string("op_20995_cast_fp16")]; - tensor out_cast_fp16 = add(x = out_1_cast_fp16, y = var_20995_cast_fp16)[name = string("out_cast_fp16")]; - tensor var_21007_begin_0 = const()[name = string("op_21007_begin_0"), val = tensor([0, 0, 0])]; - tensor var_21007_end_0 = const()[name = string("op_21007_end_0"), val = tensor([1, 1, 1152])]; - tensor var_21007_end_mask_0 = const()[name = string("op_21007_end_mask_0"), val = tensor([true, false, true])]; - tensor output_hidden_states = slice_by_index(begin = var_21007_begin_0, end = var_21007_end_0, end_mask = var_21007_end_mask_0, x = out_cast_fp16)[name = string("op_21007_cast_fp16")]; - } -> (output_hidden_states); -} \ No newline at end of file