model.vision_tower.patch_embedder.input_proj.input_quantizer TensorQuantizer(disabled) model.vision_tower.patch_embedder.input_proj.output_quantizer TensorQuantizer(disabled) model.vision_tower.patch_embedder.input_proj.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.0.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.1.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.2.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.3.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.4.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.5.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.6.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.7.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.8.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.9.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.10.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.11.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.12.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.13.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.14.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.15.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.16.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.17.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.18.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.19.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.20.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.21.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.22.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.23.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.24.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.25.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.self_attn.q_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.self_attn.q_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.self_attn.q_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.self_attn.k_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.self_attn.k_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.self_attn.k_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.self_attn.v_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.self_attn.v_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.self_attn.v_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.self_attn.o_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.self_attn.o_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.self_attn.o_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.self_attn.k_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.self_attn.v_bmm_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.self_attn.softmax_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.mlp.gate_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.mlp.gate_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.mlp.gate_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.mlp.up_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.mlp.up_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.mlp.up_proj.linear.weight_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.mlp.down_proj.linear.input_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.mlp.down_proj.linear.output_quantizer TensorQuantizer(disabled) model.vision_tower.encoder.layers.26.mlp.down_proj.linear.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.0.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.0.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.0.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.0.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.0.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.0.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.0.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.0.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.0.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.0.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.0.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.0.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.0.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.0.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.0.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.0.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.0.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=84.0000 calibrator=MaxCalibrator quant) model.language_model.layers.0.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.0.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2617 calibrator=MaxCalibrator quant) model.language_model.layers.0.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=84.0000 calibrator=MaxCalibrator quant) model.language_model.layers.0.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.0.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2031 calibrator=MaxCalibrator quant) model.language_model.layers.0.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=876.0000 calibrator=MaxCalibrator quant) model.language_model.layers.0.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.0.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2148 calibrator=MaxCalibrator quant) model.language_model.layers.1.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.1.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.1.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.1.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.1.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.1.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.1.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.1.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.1.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.1.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.1.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.1.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.1.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.1.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.1.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.1.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.1.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.7500 calibrator=MaxCalibrator quant) model.language_model.layers.1.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.1.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2100 calibrator=MaxCalibrator quant) model.language_model.layers.1.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.7500 calibrator=MaxCalibrator quant) model.language_model.layers.1.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.1.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1992 calibrator=MaxCalibrator quant) model.language_model.layers.1.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=848.0000 calibrator=MaxCalibrator quant) model.language_model.layers.1.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.1.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2002 calibrator=MaxCalibrator quant) model.language_model.layers.2.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.2.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.2.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.2.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.2.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.2.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.2.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.2.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.2.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.2.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.2.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.2.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.2.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.2.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.2.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.2.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.2.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=44.5000 calibrator=MaxCalibrator quant) model.language_model.layers.2.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.2.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1748 calibrator=MaxCalibrator quant) model.language_model.layers.2.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=44.5000 calibrator=MaxCalibrator quant) model.language_model.layers.2.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.2.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2080 calibrator=MaxCalibrator quant) model.language_model.layers.2.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=430.0000 calibrator=MaxCalibrator quant) model.language_model.layers.2.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.2.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3633 calibrator=MaxCalibrator quant) model.language_model.layers.3.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.3.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.3.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.3.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.3.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.3.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.3.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.3.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.3.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.3.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.3.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.3.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.3.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.3.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.3.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.3.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.3.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.5000 calibrator=MaxCalibrator quant) model.language_model.layers.3.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.3.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2197 calibrator=MaxCalibrator quant) model.language_model.layers.3.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.5000 calibrator=MaxCalibrator quant) model.language_model.layers.3.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.3.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2178 calibrator=MaxCalibrator quant) model.language_model.layers.3.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=318.0000 calibrator=MaxCalibrator quant) model.language_model.layers.3.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.3.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2275 calibrator=MaxCalibrator quant) model.language_model.layers.4.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.4.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.4.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.4.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.4.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.4.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.4.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.4.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.4.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.4.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.4.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.4.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.4.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.4.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.4.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.4.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.4.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=119.0000 calibrator=MaxCalibrator quant) model.language_model.layers.4.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.4.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1807 calibrator=MaxCalibrator quant) model.language_model.layers.4.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=119.0000 calibrator=MaxCalibrator quant) model.language_model.layers.4.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.4.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1582 calibrator=MaxCalibrator quant) model.language_model.layers.4.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=488.0000 calibrator=MaxCalibrator quant) model.language_model.layers.4.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.4.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2969 calibrator=MaxCalibrator quant) model.language_model.layers.5.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.5.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.5.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.5.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.5.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.5.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.5.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.5.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.5.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.5.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.5.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.5.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.5.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.5.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=85.0000 calibrator=MaxCalibrator quant) model.language_model.layers.5.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.5.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2041 calibrator=MaxCalibrator quant) model.language_model.layers.5.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=85.0000 calibrator=MaxCalibrator quant) model.language_model.layers.5.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.5.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2168 calibrator=MaxCalibrator quant) model.language_model.layers.5.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=944.0000 calibrator=MaxCalibrator quant) model.language_model.layers.5.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.5.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2070 calibrator=MaxCalibrator quant) model.language_model.layers.6.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.6.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.6.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.6.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.6.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.6.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.6.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.6.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.6.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.6.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.6.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.6.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.6.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.6.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.6.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.6.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.6.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=29.6250 calibrator=MaxCalibrator quant) model.language_model.layers.6.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.6.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2041 calibrator=MaxCalibrator quant) model.language_model.layers.6.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=29.6250 calibrator=MaxCalibrator quant) model.language_model.layers.6.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.6.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2266 calibrator=MaxCalibrator quant) model.language_model.layers.6.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=440.0000 calibrator=MaxCalibrator quant) model.language_model.layers.6.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.6.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2676 calibrator=MaxCalibrator quant) model.language_model.layers.7.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.7.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.7.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.7.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.7.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.7.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.7.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.7.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.7.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.7.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.7.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.7.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.7.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.7.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.7.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.7.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.7.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.1250 calibrator=MaxCalibrator quant) model.language_model.layers.7.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.7.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2773 calibrator=MaxCalibrator quant) model.language_model.layers.7.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.1250 calibrator=MaxCalibrator quant) model.language_model.layers.7.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.7.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2227 calibrator=MaxCalibrator quant) model.language_model.layers.7.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=220.0000 calibrator=MaxCalibrator quant) model.language_model.layers.7.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.7.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2812 calibrator=MaxCalibrator quant) model.language_model.layers.8.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.8.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.8.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.8.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.8.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.8.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.8.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.8.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.8.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.8.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.8.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.8.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.8.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.8.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.8.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.8.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.8.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=29.2500 calibrator=MaxCalibrator quant) model.language_model.layers.8.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.8.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3184 calibrator=MaxCalibrator quant) model.language_model.layers.8.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=29.2500 calibrator=MaxCalibrator quant) model.language_model.layers.8.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.8.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2617 calibrator=MaxCalibrator quant) model.language_model.layers.8.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=161.0000 calibrator=MaxCalibrator quant) model.language_model.layers.8.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.8.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2617 calibrator=MaxCalibrator quant) model.language_model.layers.9.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.9.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.9.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.9.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.9.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.9.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.9.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.9.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.9.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.9.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.9.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.9.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.9.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.9.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.9.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.9.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.9.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.3750 calibrator=MaxCalibrator quant) model.language_model.layers.9.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.9.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2129 calibrator=MaxCalibrator quant) model.language_model.layers.9.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.3750 calibrator=MaxCalibrator quant) model.language_model.layers.9.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.9.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2451 calibrator=MaxCalibrator quant) model.language_model.layers.9.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=125.0000 calibrator=MaxCalibrator quant) model.language_model.layers.9.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.9.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3125 calibrator=MaxCalibrator quant) model.language_model.layers.10.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.10.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.10.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.10.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.10.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.10.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.10.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.10.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.10.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.10.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.10.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.10.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.10.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.10.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.10.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.10.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.10.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=66.5000 calibrator=MaxCalibrator quant) model.language_model.layers.10.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.10.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2012 calibrator=MaxCalibrator quant) model.language_model.layers.10.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=66.5000 calibrator=MaxCalibrator quant) model.language_model.layers.10.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.10.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2109 calibrator=MaxCalibrator quant) model.language_model.layers.10.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=198.0000 calibrator=MaxCalibrator quant) model.language_model.layers.10.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.10.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4004 calibrator=MaxCalibrator quant) model.language_model.layers.11.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.11.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.11.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.11.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.11.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.11.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.11.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.11.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.11.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.11.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.11.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.11.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.11.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.11.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=400.0000 calibrator=MaxCalibrator quant) model.language_model.layers.11.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.11.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1982 calibrator=MaxCalibrator quant) model.language_model.layers.11.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=400.0000 calibrator=MaxCalibrator quant) model.language_model.layers.11.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.11.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1992 calibrator=MaxCalibrator quant) model.language_model.layers.11.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=608.0000 calibrator=MaxCalibrator quant) model.language_model.layers.11.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.11.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3477 calibrator=MaxCalibrator quant) model.language_model.layers.12.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.12.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.12.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.12.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.12.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.12.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.12.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.12.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.12.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.12.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.12.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.12.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.12.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.12.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.12.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.12.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.12.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=18.1250 calibrator=MaxCalibrator quant) model.language_model.layers.12.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.12.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2334 calibrator=MaxCalibrator quant) model.language_model.layers.12.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=18.1250 calibrator=MaxCalibrator quant) model.language_model.layers.12.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.12.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2422 calibrator=MaxCalibrator quant) model.language_model.layers.12.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=205.0000 calibrator=MaxCalibrator quant) model.language_model.layers.12.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.12.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4062 calibrator=MaxCalibrator quant) model.language_model.layers.13.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.13.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.13.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.13.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.13.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.13.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.13.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.13.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.13.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.13.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.13.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.13.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.13.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.13.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.13.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.13.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.13.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=48.7500 calibrator=MaxCalibrator quant) model.language_model.layers.13.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.13.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2109 calibrator=MaxCalibrator quant) model.language_model.layers.13.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=48.7500 calibrator=MaxCalibrator quant) model.language_model.layers.13.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.13.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2383 calibrator=MaxCalibrator quant) model.language_model.layers.13.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=231.0000 calibrator=MaxCalibrator quant) model.language_model.layers.13.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.13.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4805 calibrator=MaxCalibrator quant) model.language_model.layers.14.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.14.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.14.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.14.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.14.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.14.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.14.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.14.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.14.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.14.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.14.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.14.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.14.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.14.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.14.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.14.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.14.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=38.5000 calibrator=MaxCalibrator quant) model.language_model.layers.14.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.14.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2754 calibrator=MaxCalibrator quant) model.language_model.layers.14.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=38.5000 calibrator=MaxCalibrator quant) model.language_model.layers.14.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.14.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2451 calibrator=MaxCalibrator quant) model.language_model.layers.14.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=250.0000 calibrator=MaxCalibrator quant) model.language_model.layers.14.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.14.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4922 calibrator=MaxCalibrator quant) model.language_model.layers.15.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.15.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.15.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.15.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.15.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.15.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.15.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.15.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.15.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.15.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.15.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.15.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.15.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.15.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.15.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.15.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.15.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.8750 calibrator=MaxCalibrator quant) model.language_model.layers.15.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.15.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2773 calibrator=MaxCalibrator quant) model.language_model.layers.15.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.8750 calibrator=MaxCalibrator quant) model.language_model.layers.15.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.15.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2637 calibrator=MaxCalibrator quant) model.language_model.layers.15.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=241.0000 calibrator=MaxCalibrator quant) model.language_model.layers.15.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.15.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3301 calibrator=MaxCalibrator quant) model.language_model.layers.16.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.16.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.16.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.16.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.16.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.16.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.16.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.16.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.16.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.16.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.16.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.16.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.16.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.16.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.16.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.16.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.16.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=65.5000 calibrator=MaxCalibrator quant) model.language_model.layers.16.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.16.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1943 calibrator=MaxCalibrator quant) model.language_model.layers.16.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=65.5000 calibrator=MaxCalibrator quant) model.language_model.layers.16.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.16.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2490 calibrator=MaxCalibrator quant) model.language_model.layers.16.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=462.0000 calibrator=MaxCalibrator quant) model.language_model.layers.16.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.16.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2793 calibrator=MaxCalibrator quant) model.language_model.layers.17.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.17.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.17.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.17.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.17.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.17.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.17.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.17.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.17.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.17.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.17.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.17.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.17.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.17.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=76.5000 calibrator=MaxCalibrator quant) model.language_model.layers.17.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.17.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2441 calibrator=MaxCalibrator quant) model.language_model.layers.17.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=76.5000 calibrator=MaxCalibrator quant) model.language_model.layers.17.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.17.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2324 calibrator=MaxCalibrator quant) model.language_model.layers.17.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=728.0000 calibrator=MaxCalibrator quant) model.language_model.layers.17.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.17.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3965 calibrator=MaxCalibrator quant) model.language_model.layers.18.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.18.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.18.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.18.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.18.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.18.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.18.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.18.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.18.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.18.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.18.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.18.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.18.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.18.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.18.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.18.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.18.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=76.5000 calibrator=MaxCalibrator quant) model.language_model.layers.18.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.18.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1855 calibrator=MaxCalibrator quant) model.language_model.layers.18.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=76.5000 calibrator=MaxCalibrator quant) model.language_model.layers.18.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.18.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2100 calibrator=MaxCalibrator quant) model.language_model.layers.18.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=760.0000 calibrator=MaxCalibrator quant) model.language_model.layers.18.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.18.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3301 calibrator=MaxCalibrator quant) model.language_model.layers.19.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.19.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.19.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.19.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.19.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.19.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.19.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.19.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.19.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.19.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.19.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.19.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.19.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.19.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.19.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.19.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.19.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.2500 calibrator=MaxCalibrator quant) model.language_model.layers.19.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.19.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2324 calibrator=MaxCalibrator quant) model.language_model.layers.19.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.2500 calibrator=MaxCalibrator quant) model.language_model.layers.19.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.19.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2129 calibrator=MaxCalibrator quant) model.language_model.layers.19.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=924.0000 calibrator=MaxCalibrator quant) model.language_model.layers.19.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.19.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2715 calibrator=MaxCalibrator quant) model.language_model.layers.20.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.20.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.20.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.20.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.20.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.20.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.20.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.20.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.20.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.20.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.20.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.20.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.20.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.20.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.20.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.20.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.20.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.7500 calibrator=MaxCalibrator quant) model.language_model.layers.20.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.20.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3145 calibrator=MaxCalibrator quant) model.language_model.layers.20.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.7500 calibrator=MaxCalibrator quant) model.language_model.layers.20.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.20.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2676 calibrator=MaxCalibrator quant) model.language_model.layers.20.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=160.0000 calibrator=MaxCalibrator quant) model.language_model.layers.20.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.20.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3281 calibrator=MaxCalibrator quant) model.language_model.layers.21.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.21.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.21.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.21.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.21.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.21.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.21.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.21.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.21.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.21.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.21.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.21.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.21.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.21.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.21.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.21.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.21.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=37.2500 calibrator=MaxCalibrator quant) model.language_model.layers.21.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.21.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2490 calibrator=MaxCalibrator quant) model.language_model.layers.21.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=37.2500 calibrator=MaxCalibrator quant) model.language_model.layers.21.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.21.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2500 calibrator=MaxCalibrator quant) model.language_model.layers.21.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=348.0000 calibrator=MaxCalibrator quant) model.language_model.layers.21.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.21.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2461 calibrator=MaxCalibrator quant) model.language_model.layers.22.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.22.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.22.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.22.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.22.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.22.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.22.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.22.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.22.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.22.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.22.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.22.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.22.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.22.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.22.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.22.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.22.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.8750 calibrator=MaxCalibrator quant) model.language_model.layers.22.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.22.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3125 calibrator=MaxCalibrator quant) model.language_model.layers.22.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.8750 calibrator=MaxCalibrator quant) model.language_model.layers.22.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.22.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2490 calibrator=MaxCalibrator quant) model.language_model.layers.22.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=616.0000 calibrator=MaxCalibrator quant) model.language_model.layers.22.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.22.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3340 calibrator=MaxCalibrator quant) model.language_model.layers.23.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.23.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.23.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.23.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.23.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.23.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.23.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.23.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.23.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.23.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.23.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.23.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.23.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.23.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=18.3750 calibrator=MaxCalibrator quant) model.language_model.layers.23.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.23.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3496 calibrator=MaxCalibrator quant) model.language_model.layers.23.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=18.3750 calibrator=MaxCalibrator quant) model.language_model.layers.23.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.23.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2598 calibrator=MaxCalibrator quant) model.language_model.layers.23.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=123.0000 calibrator=MaxCalibrator quant) model.language_model.layers.23.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.23.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5742 calibrator=MaxCalibrator quant) model.language_model.layers.24.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.24.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.24.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.24.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.24.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.24.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.24.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.24.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.24.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.24.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.24.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.24.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.24.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.24.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.24.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.24.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.24.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=23.5000 calibrator=MaxCalibrator quant) model.language_model.layers.24.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.24.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3457 calibrator=MaxCalibrator quant) model.language_model.layers.24.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=23.5000 calibrator=MaxCalibrator quant) model.language_model.layers.24.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.24.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3047 calibrator=MaxCalibrator quant) model.language_model.layers.24.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=198.0000 calibrator=MaxCalibrator quant) model.language_model.layers.24.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.24.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3984 calibrator=MaxCalibrator quant) model.language_model.layers.25.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.25.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.25.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.25.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.25.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.25.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.25.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.25.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.25.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.25.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.25.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.25.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.25.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.25.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.25.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.25.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.25.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.6250 calibrator=MaxCalibrator quant) model.language_model.layers.25.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.25.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4355 calibrator=MaxCalibrator quant) model.language_model.layers.25.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.6250 calibrator=MaxCalibrator quant) model.language_model.layers.25.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.25.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2910 calibrator=MaxCalibrator quant) model.language_model.layers.25.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=125.5000 calibrator=MaxCalibrator quant) model.language_model.layers.25.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.25.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3730 calibrator=MaxCalibrator quant) model.language_model.layers.26.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.26.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.26.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.26.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.26.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.26.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.26.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.26.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.26.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.26.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.26.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.26.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.26.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.26.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.26.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.26.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.26.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=36.2500 calibrator=MaxCalibrator quant) model.language_model.layers.26.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.26.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2754 calibrator=MaxCalibrator quant) model.language_model.layers.26.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=36.2500 calibrator=MaxCalibrator quant) model.language_model.layers.26.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.26.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2930 calibrator=MaxCalibrator quant) model.language_model.layers.26.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=196.0000 calibrator=MaxCalibrator quant) model.language_model.layers.26.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.26.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3789 calibrator=MaxCalibrator quant) model.language_model.layers.27.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.27.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.27.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.27.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.27.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.27.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.27.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.27.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.27.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.27.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.27.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.27.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.27.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.27.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.27.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.27.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.27.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=21.3750 calibrator=MaxCalibrator quant) model.language_model.layers.27.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.27.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3301 calibrator=MaxCalibrator quant) model.language_model.layers.27.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=21.3750 calibrator=MaxCalibrator quant) model.language_model.layers.27.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.27.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3926 calibrator=MaxCalibrator quant) model.language_model.layers.27.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=121.0000 calibrator=MaxCalibrator quant) model.language_model.layers.27.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.27.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4004 calibrator=MaxCalibrator quant) model.language_model.layers.28.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.28.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.28.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.28.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.28.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.28.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.28.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.28.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.28.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.28.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.28.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.28.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.28.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.28.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.28.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.28.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.28.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=25.6250 calibrator=MaxCalibrator quant) model.language_model.layers.28.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.28.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2949 calibrator=MaxCalibrator quant) model.language_model.layers.28.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=25.6250 calibrator=MaxCalibrator quant) model.language_model.layers.28.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.28.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5664 calibrator=MaxCalibrator quant) model.language_model.layers.28.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=67.0000 calibrator=MaxCalibrator quant) model.language_model.layers.28.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.28.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3066 calibrator=MaxCalibrator quant) model.language_model.layers.29.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.29.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.29.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.29.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.29.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.29.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.29.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.29.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.29.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.29.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.29.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.29.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.29.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.29.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=45.2500 calibrator=MaxCalibrator quant) model.language_model.layers.29.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.29.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4004 calibrator=MaxCalibrator quant) model.language_model.layers.29.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=45.2500 calibrator=MaxCalibrator quant) model.language_model.layers.29.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.29.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3047 calibrator=MaxCalibrator quant) model.language_model.layers.29.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=100.5000 calibrator=MaxCalibrator quant) model.language_model.layers.29.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.29.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3809 calibrator=MaxCalibrator quant) model.language_model.layers.30.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.30.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.30.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.30.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.30.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.30.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.30.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.30.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.30.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.30.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.30.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.30.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.30.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.30.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.30.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.30.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.30.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=84.0000 calibrator=MaxCalibrator quant) model.language_model.layers.30.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.30.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5664 calibrator=MaxCalibrator quant) model.language_model.layers.30.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=84.0000 calibrator=MaxCalibrator quant) model.language_model.layers.30.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.30.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4082 calibrator=MaxCalibrator quant) model.language_model.layers.30.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=161.0000 calibrator=MaxCalibrator quant) model.language_model.layers.30.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.30.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3867 calibrator=MaxCalibrator quant) model.language_model.layers.31.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.31.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.31.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.31.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.31.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.31.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.31.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.31.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.31.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.31.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.31.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.31.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.31.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.31.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.31.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.31.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.31.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=140.0000 calibrator=MaxCalibrator quant) model.language_model.layers.31.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.31.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4004 calibrator=MaxCalibrator quant) model.language_model.layers.31.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=140.0000 calibrator=MaxCalibrator quant) model.language_model.layers.31.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.31.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4082 calibrator=MaxCalibrator quant) model.language_model.layers.31.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=426.0000 calibrator=MaxCalibrator quant) model.language_model.layers.31.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.31.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3496 calibrator=MaxCalibrator quant) model.language_model.layers.32.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.32.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.32.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.32.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.32.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.32.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.32.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.32.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.32.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.32.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.32.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.32.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.32.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.32.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.32.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.32.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.32.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=14.5625 calibrator=MaxCalibrator quant) model.language_model.layers.32.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.32.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3320 calibrator=MaxCalibrator quant) model.language_model.layers.32.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=14.5625 calibrator=MaxCalibrator quant) model.language_model.layers.32.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.32.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2871 calibrator=MaxCalibrator quant) model.language_model.layers.32.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=328.0000 calibrator=MaxCalibrator quant) model.language_model.layers.32.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.32.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3867 calibrator=MaxCalibrator quant) model.language_model.layers.33.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.33.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.33.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.33.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.33.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.33.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.33.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.33.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.33.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.33.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.33.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.33.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.33.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.33.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.33.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.33.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.33.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=198.0000 calibrator=MaxCalibrator quant) model.language_model.layers.33.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.33.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3809 calibrator=MaxCalibrator quant) model.language_model.layers.33.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=198.0000 calibrator=MaxCalibrator quant) model.language_model.layers.33.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.33.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3047 calibrator=MaxCalibrator quant) model.language_model.layers.33.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=322.0000 calibrator=MaxCalibrator quant) model.language_model.layers.33.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.33.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2715 calibrator=MaxCalibrator quant) model.language_model.layers.34.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.34.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.34.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.34.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.34.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.34.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.34.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.34.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.34.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.34.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.34.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.34.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.34.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.34.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.34.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.34.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.34.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=266.0000 calibrator=MaxCalibrator quant) model.language_model.layers.34.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.34.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3047 calibrator=MaxCalibrator quant) model.language_model.layers.34.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=266.0000 calibrator=MaxCalibrator quant) model.language_model.layers.34.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.34.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2637 calibrator=MaxCalibrator quant) model.language_model.layers.34.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=696.0000 calibrator=MaxCalibrator quant) model.language_model.layers.34.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.34.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3164 calibrator=MaxCalibrator quant) model.language_model.layers.35.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.35.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.35.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.35.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.35.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.35.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.35.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.35.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.35.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.35.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.35.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.35.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.35.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.35.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=308.0000 calibrator=MaxCalibrator quant) model.language_model.layers.35.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.35.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4004 calibrator=MaxCalibrator quant) model.language_model.layers.35.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=308.0000 calibrator=MaxCalibrator quant) model.language_model.layers.35.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.35.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3242 calibrator=MaxCalibrator quant) model.language_model.layers.35.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=322.0000 calibrator=MaxCalibrator quant) model.language_model.layers.35.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.35.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3066 calibrator=MaxCalibrator quant) model.language_model.layers.36.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.36.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.36.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.36.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.36.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.36.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.36.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.36.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.36.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.36.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.36.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.36.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.36.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.36.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.36.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.36.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.36.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=112.0000 calibrator=MaxCalibrator quant) model.language_model.layers.36.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.36.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3184 calibrator=MaxCalibrator quant) model.language_model.layers.36.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=112.0000 calibrator=MaxCalibrator quant) model.language_model.layers.36.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.36.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3027 calibrator=MaxCalibrator quant) model.language_model.layers.36.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=104.5000 calibrator=MaxCalibrator quant) model.language_model.layers.36.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.36.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3594 calibrator=MaxCalibrator quant) model.language_model.layers.37.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.37.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.37.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.37.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.37.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.37.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.37.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.37.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.37.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.37.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.37.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.37.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.37.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.37.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.37.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.37.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.37.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=102.5000 calibrator=MaxCalibrator quant) model.language_model.layers.37.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.37.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3105 calibrator=MaxCalibrator quant) model.language_model.layers.37.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=102.5000 calibrator=MaxCalibrator quant) model.language_model.layers.37.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.37.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2754 calibrator=MaxCalibrator quant) model.language_model.layers.37.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=162.0000 calibrator=MaxCalibrator quant) model.language_model.layers.37.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.37.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3223 calibrator=MaxCalibrator quant) model.language_model.layers.38.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.38.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.38.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.38.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.38.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.38.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.38.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.38.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.38.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.38.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.38.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.38.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.38.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.38.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.38.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.38.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.38.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=98.5000 calibrator=MaxCalibrator quant) model.language_model.layers.38.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.38.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6016 calibrator=MaxCalibrator quant) model.language_model.layers.38.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=98.5000 calibrator=MaxCalibrator quant) model.language_model.layers.38.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.38.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6992 calibrator=MaxCalibrator quant) model.language_model.layers.38.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=121.0000 calibrator=MaxCalibrator quant) model.language_model.layers.38.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.38.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3359 calibrator=MaxCalibrator quant) model.language_model.layers.39.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.39.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.39.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.39.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.39.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.39.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.39.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.39.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.39.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.39.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.39.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.39.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.39.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.39.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.39.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.39.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.39.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=52.5000 calibrator=MaxCalibrator quant) model.language_model.layers.39.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.39.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3145 calibrator=MaxCalibrator quant) model.language_model.layers.39.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=52.5000 calibrator=MaxCalibrator quant) model.language_model.layers.39.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.39.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2988 calibrator=MaxCalibrator quant) model.language_model.layers.39.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=74.5000 calibrator=MaxCalibrator quant) model.language_model.layers.39.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.39.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4180 calibrator=MaxCalibrator quant) model.language_model.layers.40.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.40.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.40.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.40.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.40.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.40.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.40.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.40.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.40.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.40.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.40.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.40.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.40.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.40.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.40.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.40.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.40.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=49.5000 calibrator=MaxCalibrator quant) model.language_model.layers.40.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.40.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3594 calibrator=MaxCalibrator quant) model.language_model.layers.40.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=49.5000 calibrator=MaxCalibrator quant) model.language_model.layers.40.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.40.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3984 calibrator=MaxCalibrator quant) model.language_model.layers.40.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=116.5000 calibrator=MaxCalibrator quant) model.language_model.layers.40.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.40.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2773 calibrator=MaxCalibrator quant) model.language_model.layers.41.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.41.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.41.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.41.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.41.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.41.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.41.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.41.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.41.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.41.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.41.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.41.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.41.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.41.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.7500 calibrator=MaxCalibrator quant) model.language_model.layers.41.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.41.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3730 calibrator=MaxCalibrator quant) model.language_model.layers.41.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.7500 calibrator=MaxCalibrator quant) model.language_model.layers.41.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.41.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2676 calibrator=MaxCalibrator quant) model.language_model.layers.41.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=100.5000 calibrator=MaxCalibrator quant) model.language_model.layers.41.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.41.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4062 calibrator=MaxCalibrator quant) model.language_model.layers.42.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.42.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.42.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.42.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.42.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.42.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.42.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.42.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.42.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.42.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.42.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.42.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.42.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.42.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.42.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.42.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.42.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=37.5000 calibrator=MaxCalibrator quant) model.language_model.layers.42.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.42.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3086 calibrator=MaxCalibrator quant) model.language_model.layers.42.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=37.5000 calibrator=MaxCalibrator quant) model.language_model.layers.42.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.42.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2598 calibrator=MaxCalibrator quant) model.language_model.layers.42.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=192.0000 calibrator=MaxCalibrator quant) model.language_model.layers.42.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.42.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2676 calibrator=MaxCalibrator quant) model.language_model.layers.43.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.43.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.43.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.43.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.43.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.43.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.43.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.43.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.43.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.43.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.43.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.43.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.43.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.43.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.43.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.43.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.43.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=32.2500 calibrator=MaxCalibrator quant) model.language_model.layers.43.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.43.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3008 calibrator=MaxCalibrator quant) model.language_model.layers.43.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=32.2500 calibrator=MaxCalibrator quant) model.language_model.layers.43.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.43.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3359 calibrator=MaxCalibrator quant) model.language_model.layers.43.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=88.5000 calibrator=MaxCalibrator quant) model.language_model.layers.43.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.43.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2422 calibrator=MaxCalibrator quant) model.language_model.layers.44.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.44.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.44.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.44.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.44.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.44.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.44.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.44.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.44.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.44.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.44.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.44.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.44.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.44.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.44.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.44.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.44.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=38.5000 calibrator=MaxCalibrator quant) model.language_model.layers.44.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.44.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2539 calibrator=MaxCalibrator quant) model.language_model.layers.44.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=38.5000 calibrator=MaxCalibrator quant) model.language_model.layers.44.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.44.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2471 calibrator=MaxCalibrator quant) model.language_model.layers.44.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=93.5000 calibrator=MaxCalibrator quant) model.language_model.layers.44.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.44.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3164 calibrator=MaxCalibrator quant) model.language_model.layers.45.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.45.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.45.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.45.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.45.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.45.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.45.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.45.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.45.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.45.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.45.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.45.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.45.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.45.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.45.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.45.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.45.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=38.0000 calibrator=MaxCalibrator quant) model.language_model.layers.45.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.45.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3789 calibrator=MaxCalibrator quant) model.language_model.layers.45.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=38.0000 calibrator=MaxCalibrator quant) model.language_model.layers.45.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.45.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2773 calibrator=MaxCalibrator quant) model.language_model.layers.45.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=128.0000 calibrator=MaxCalibrator quant) model.language_model.layers.45.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.45.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2305 calibrator=MaxCalibrator quant) model.language_model.layers.46.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.46.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.46.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.46.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.46.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.46.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.46.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.46.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.46.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.46.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.46.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.46.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.46.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.46.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.46.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.46.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.46.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=41.5000 calibrator=MaxCalibrator quant) model.language_model.layers.46.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.46.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3301 calibrator=MaxCalibrator quant) model.language_model.layers.46.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=41.5000 calibrator=MaxCalibrator quant) model.language_model.layers.46.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.46.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2432 calibrator=MaxCalibrator quant) model.language_model.layers.46.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=117.0000 calibrator=MaxCalibrator quant) model.language_model.layers.46.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.46.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2461 calibrator=MaxCalibrator quant) model.language_model.layers.47.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.47.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.47.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.47.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.47.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.47.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.47.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.47.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.47.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.47.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.47.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.47.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.47.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.47.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.1250 calibrator=MaxCalibrator quant) model.language_model.layers.47.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.47.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2969 calibrator=MaxCalibrator quant) model.language_model.layers.47.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.1250 calibrator=MaxCalibrator quant) model.language_model.layers.47.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.47.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2256 calibrator=MaxCalibrator quant) model.language_model.layers.47.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=201.0000 calibrator=MaxCalibrator quant) model.language_model.layers.47.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.47.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2051 calibrator=MaxCalibrator quant) model.language_model.layers.48.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.48.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.48.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.48.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.48.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.48.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.48.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.48.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.48.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.48.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.48.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.48.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.48.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.48.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.48.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.48.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.48.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.8750 calibrator=MaxCalibrator quant) model.language_model.layers.48.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.48.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2695 calibrator=MaxCalibrator quant) model.language_model.layers.48.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.8750 calibrator=MaxCalibrator quant) model.language_model.layers.48.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.48.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1855 calibrator=MaxCalibrator quant) model.language_model.layers.48.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=256.0000 calibrator=MaxCalibrator quant) model.language_model.layers.48.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.48.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2451 calibrator=MaxCalibrator quant) model.language_model.layers.49.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.49.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.49.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.49.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.49.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.49.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.49.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.49.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.49.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.49.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.49.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.49.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.49.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.49.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.49.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.49.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.49.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=50.5000 calibrator=MaxCalibrator quant) model.language_model.layers.49.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.49.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3672 calibrator=MaxCalibrator quant) model.language_model.layers.49.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=50.5000 calibrator=MaxCalibrator quant) model.language_model.layers.49.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.49.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2275 calibrator=MaxCalibrator quant) model.language_model.layers.49.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=104.0000 calibrator=MaxCalibrator quant) model.language_model.layers.49.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.49.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2031 calibrator=MaxCalibrator quant) model.language_model.layers.50.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.50.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.50.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.50.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.50.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.50.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.50.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.50.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.50.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.50.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.50.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.50.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.50.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.50.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.50.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.50.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.50.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=19.1250 calibrator=MaxCalibrator quant) model.language_model.layers.50.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.50.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2812 calibrator=MaxCalibrator quant) model.language_model.layers.50.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=19.1250 calibrator=MaxCalibrator quant) model.language_model.layers.50.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.50.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2295 calibrator=MaxCalibrator quant) model.language_model.layers.50.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=111.5000 calibrator=MaxCalibrator quant) model.language_model.layers.50.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.50.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3477 calibrator=MaxCalibrator quant) model.language_model.layers.51.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.51.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.51.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.51.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.51.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.51.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.51.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.51.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.51.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.51.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.51.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.51.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.51.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.51.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.51.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.51.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.51.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=32.7500 calibrator=MaxCalibrator quant) model.language_model.layers.51.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.51.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2598 calibrator=MaxCalibrator quant) model.language_model.layers.51.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=32.7500 calibrator=MaxCalibrator quant) model.language_model.layers.51.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.51.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2812 calibrator=MaxCalibrator quant) model.language_model.layers.51.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=127.0000 calibrator=MaxCalibrator quant) model.language_model.layers.51.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.51.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2266 calibrator=MaxCalibrator quant) model.language_model.layers.52.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.52.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.52.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.52.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.52.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.52.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.52.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.52.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.52.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.52.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.52.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.52.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.52.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.52.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.52.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.52.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.52.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.6250 calibrator=MaxCalibrator quant) model.language_model.layers.52.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.52.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3047 calibrator=MaxCalibrator quant) model.language_model.layers.52.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.6250 calibrator=MaxCalibrator quant) model.language_model.layers.52.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.52.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2129 calibrator=MaxCalibrator quant) model.language_model.layers.52.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=161.0000 calibrator=MaxCalibrator quant) model.language_model.layers.52.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.52.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2637 calibrator=MaxCalibrator quant) model.language_model.layers.53.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.53.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.53.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.53.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.53.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.53.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.53.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.53.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.53.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.53.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.53.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.53.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.53.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.53.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.0000 calibrator=MaxCalibrator quant) model.language_model.layers.53.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.53.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2793 calibrator=MaxCalibrator quant) model.language_model.layers.53.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.0000 calibrator=MaxCalibrator quant) model.language_model.layers.53.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.53.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2275 calibrator=MaxCalibrator quant) model.language_model.layers.53.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=322.0000 calibrator=MaxCalibrator quant) model.language_model.layers.53.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.53.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2734 calibrator=MaxCalibrator quant) model.language_model.layers.54.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.54.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.54.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.54.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.54.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.54.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.54.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.54.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.54.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.54.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.54.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.54.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.54.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.54.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.54.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.54.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.54.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.5000 calibrator=MaxCalibrator quant) model.language_model.layers.54.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.54.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2949 calibrator=MaxCalibrator quant) model.language_model.layers.54.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.5000 calibrator=MaxCalibrator quant) model.language_model.layers.54.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.54.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2520 calibrator=MaxCalibrator quant) model.language_model.layers.54.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=308.0000 calibrator=MaxCalibrator quant) model.language_model.layers.54.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.54.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2734 calibrator=MaxCalibrator quant) model.language_model.layers.55.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.55.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.55.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.55.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.55.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.55.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.55.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.55.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.55.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.55.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.55.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.55.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.55.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.55.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.55.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.55.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.55.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=44.2500 calibrator=MaxCalibrator quant) model.language_model.layers.55.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.55.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3164 calibrator=MaxCalibrator quant) model.language_model.layers.55.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=44.2500 calibrator=MaxCalibrator quant) model.language_model.layers.55.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.55.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2441 calibrator=MaxCalibrator quant) model.language_model.layers.55.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=153.0000 calibrator=MaxCalibrator quant) model.language_model.layers.55.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.55.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3223 calibrator=MaxCalibrator quant) model.language_model.layers.56.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.56.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.56.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.56.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.56.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.56.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.56.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.56.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.56.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.56.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.56.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.56.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.56.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.56.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.56.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.56.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.56.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=66.0000 calibrator=MaxCalibrator quant) model.language_model.layers.56.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.56.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2715 calibrator=MaxCalibrator quant) model.language_model.layers.56.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=66.0000 calibrator=MaxCalibrator quant) model.language_model.layers.56.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.56.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2139 calibrator=MaxCalibrator quant) model.language_model.layers.56.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=96.5000 calibrator=MaxCalibrator quant) model.language_model.layers.56.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.56.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2754 calibrator=MaxCalibrator quant) model.language_model.layers.57.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.57.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.57.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.57.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.57.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.57.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.57.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.57.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.57.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.57.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.57.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.57.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.57.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.57.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.57.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.57.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.57.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=64.5000 calibrator=MaxCalibrator quant) model.language_model.layers.57.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.57.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3066 calibrator=MaxCalibrator quant) model.language_model.layers.57.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=64.5000 calibrator=MaxCalibrator quant) model.language_model.layers.57.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.57.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2480 calibrator=MaxCalibrator quant) model.language_model.layers.57.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=129.0000 calibrator=MaxCalibrator quant) model.language_model.layers.57.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.57.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4004 calibrator=MaxCalibrator quant) model.language_model.layers.58.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.58.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.58.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.58.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.58.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.58.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.58.self_attn.v_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.58.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.58.self_attn.v_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.58.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.58.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.58.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.58.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.58.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.58.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.58.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.58.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.5000 calibrator=MaxCalibrator quant) model.language_model.layers.58.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.58.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2432 calibrator=MaxCalibrator quant) model.language_model.layers.58.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.5000 calibrator=MaxCalibrator quant) model.language_model.layers.58.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.58.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1885 calibrator=MaxCalibrator quant) model.language_model.layers.58.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=130.0000 calibrator=MaxCalibrator quant) model.language_model.layers.58.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.58.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1436 calibrator=MaxCalibrator quant) model.language_model.layers.59.self_attn.k_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.59.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.59.self_attn.k_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.59.self_attn.q_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.59.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.59.self_attn.q_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.59.self_attn.o_proj.input_quantizer TensorQuantizer(disabled) model.language_model.layers.59.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.59.self_attn.o_proj.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.59.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.59.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.59.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) model.language_model.layers.59.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.59.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=22.3750 calibrator=MaxCalibrator quant) model.language_model.layers.59.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.59.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2100 calibrator=MaxCalibrator quant) model.language_model.layers.59.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=22.3750 calibrator=MaxCalibrator quant) model.language_model.layers.59.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.59.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2383 calibrator=MaxCalibrator quant) model.language_model.layers.59.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=114.0000 calibrator=MaxCalibrator quant) model.language_model.layers.59.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.59.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4648 calibrator=MaxCalibrator quant) model.embed_vision.embedding_projection.input_quantizer TensorQuantizer(disabled) model.embed_vision.embedding_projection.output_quantizer TensorQuantizer(disabled) model.embed_vision.embedding_projection.weight_quantizer TensorQuantizer(disabled) lm_head.input_quantizer TensorQuantizer(disabled) lm_head.output_quantizer TensorQuantizer(disabled) lm_head.weight_quantizer TensorQuantizer(disabled) 2154 TensorQuantizers found in model