@@ -8777,8 +8777,8 @@ static void ggml_compute_forward_div_f32(
8777
8777
8778
8778
#ifdef GGML_USE_ACCELERATE
8779
8779
vDSP_vdiv(
8780
- (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1,
8781
8780
(float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1,
8781
+ (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1,
8782
8782
(float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1,
8783
8783
ne0);
8784
8784
#else
@@ -9831,15 +9831,15 @@ static void ggml_compute_forward_rms_norm_back_f32(
9831
9831
sum_xdz += (ggml_float)(x[i00] * dz[i00]);
9832
9832
}
9833
9833
9834
- const float mean = sum_xx/ne00;
9835
- const float mean_eps = sum_xx/ne00 + eps;
9836
- const float sum_eps = sum_xx + eps*ne00;
9837
- const float mean_xdz = sum_xdz/ne00;
9834
+ const ggml_float mean = sum_xx/ne00;
9835
+ const ggml_float mean_eps = sum_xx/ne00 + eps;
9836
+ const ggml_float sum_eps = sum_xx + eps*ne00;
9837
+ const ggml_float mean_xdz = sum_xdz/ne00;
9838
9838
// we could cache rms from forward pass to improve performance.
9839
9839
// to do this implement ggml_rms and compose ggml_rms_norm using ggml_rms.
9840
- const float rms = sqrtf(mean_eps);
9841
- const float rrms = 1.0f / sqrtf(mean_eps);
9842
- const float scale = -rrms/(ne00 * mean_eps); // -1/(n*rms**3)
9840
+ const ggml_float rms = sqrtf(mean_eps);
9841
+ const ggml_float rrms = 1.0f / sqrtf(mean_eps);
9842
+ const ggml_float scale = -rrms/(ne00 * mean_eps); // -1/(n*rms**3)
9843
9843
9844
9844
{
9845
9845
// z = rms_norm(x)
0 commit comments