ggerganov HF Staff commited on
Commit
9d1185a
·
1 Parent(s): c0fcd7a

metal : disable fast-math for some cpy kernels (llama/14460)

Browse files

* metal : disable fast-math for some cpy kernels

ggml-ci

* cont : disable for q4_1

ggml-ci

* cont : disable for iq4_nl

ggml-ci

ggml/src/ggml-metal/ggml-metal.metal CHANGED
@@ -138,6 +138,7 @@ void quantize_q4_0(device const float * src, device block_q4_0 & dst) {
138
  }
139
 
140
  void quantize_q4_1(device const float * src, device block_q4_1 & dst) {
 
141
  float min = FLT_MAX;
142
  float max = -FLT_MAX;
143
 
@@ -203,6 +204,7 @@ void quantize_q5_0(device const float * src, device block_q5_0 & dst) {
203
  }
204
 
205
  void quantize_q5_1(device const float * src, device block_q5_1 & dst) {
 
206
  float max = src[0];
207
  float min = src[0];
208
 
@@ -239,6 +241,7 @@ void quantize_q5_1(device const float * src, device block_q5_1 & dst) {
239
  }
240
 
241
  void quantize_iq4_nl(device const float * src, device block_iq4_nl & dst) {
 
242
  float amax = 0.0f; // absolute max
243
  float max = 0.0f;
244
 
 
138
  }
139
 
140
  void quantize_q4_1(device const float * src, device block_q4_1 & dst) {
141
+ #pragma METAL fp math_mode(safe)
142
  float min = FLT_MAX;
143
  float max = -FLT_MAX;
144
 
 
204
  }
205
 
206
  void quantize_q5_1(device const float * src, device block_q5_1 & dst) {
207
+ #pragma METAL fp math_mode(safe)
208
  float max = src[0];
209
  float min = src[0];
210
 
 
241
  }
242
 
243
  void quantize_iq4_nl(device const float * src, device block_iq4_nl & dst) {
244
+ #pragma METAL fp math_mode(safe)
245
  float amax = 0.0f; // absolute max
246
  float max = 0.0f;
247