ggerganov HF Staff commited on
Commit
efed5ba
·
unverified ·
1 Parent(s): 52c45b9

ggml : fix vld1q_s8_x4 32-bit compat (llama/4828)

Browse files

* ggml : fix vld1q_s8_x4 32-bit compat

ggml-ci

* ggml : fix 32-bit ARM compat (cont)

ggml-ci

Files changed (1) hide show
  1. ggml-quants.c +4 -4
ggml-quants.c CHANGED
@@ -7250,9 +7250,9 @@ void ggml_vec_dot_iq2_xxs_q8_K(const int n, float * restrict s, const void * res
7250
  uint32_t aux32[4];
7251
  const uint8_t * aux8 = (const uint8_t *)aux32;
7252
 
7253
- int8x16x4_t q2u;
7254
- int8x16x4_t q2s;
7255
- int8x16x4_t q8b;
7256
 
7257
  float sumf = 0;
7258
  for (int i = 0; i < nb; ++i) {
@@ -7261,7 +7261,7 @@ void ggml_vec_dot_iq2_xxs_q8_K(const int n, float * restrict s, const void * res
7261
  const int8_t * restrict q8 = y[i].qs;
7262
  float sumf1 = 0, sumf2 = 0;
7263
  for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
7264
- q8b = vld1q_s8_x4(q8); q8 += 64;
7265
  memcpy(aux32, q2, 4*sizeof(uint32_t)); q2 += 8;
7266
  q2u.val[0] = vcombine_s8(vld1_s8((const void *)(iq2xxs_grid + aux8[ 0])), vld1_s8((const void *)(iq2xxs_grid + aux8[ 1])));
7267
  q2u.val[1] = vcombine_s8(vld1_s8((const void *)(iq2xxs_grid + aux8[ 2])), vld1_s8((const void *)(iq2xxs_grid + aux8[ 3])));
 
7250
  uint32_t aux32[4];
7251
  const uint8_t * aux8 = (const uint8_t *)aux32;
7252
 
7253
+ ggml_int8x16x4_t q2u;
7254
+ ggml_int8x16x4_t q2s;
7255
+ ggml_int8x16x4_t q8b;
7256
 
7257
  float sumf = 0;
7258
  for (int i = 0; i < nb; ++i) {
 
7261
  const int8_t * restrict q8 = y[i].qs;
7262
  float sumf1 = 0, sumf2 = 0;
7263
  for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
7264
+ q8b = ggml_vld1q_s8_x4(q8); q8 += 64;
7265
  memcpy(aux32, q2, 4*sizeof(uint32_t)); q2 += 8;
7266
  q2u.val[0] = vcombine_s8(vld1_s8((const void *)(iq2xxs_grid + aux8[ 0])), vld1_s8((const void *)(iq2xxs_grid + aux8[ 1])));
7267
  q2u.val[1] = vcombine_s8(vld1_s8((const void *)(iq2xxs_grid + aux8[ 2])), vld1_s8((const void *)(iq2xxs_grid + aux8[ 3])));