Skip to content

Commit

Permalink
fix avx
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Nov 14, 2024
1 parent a321287 commit 459cf4c
Showing 1 changed file with 22 additions and 12 deletions.
34 changes: 22 additions & 12 deletions src/layer/x86/gemm_int8.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,19 @@ static void print(__m512 x)
}
#endif

#if __AVX__
static void print(__m256 x)
{
float a[8];
_mm256_storeu_ps(a, x);
for (int i = 0; i < 8; i++)
{
fprintf(stderr, "%.0f ", a[i]);
}
fprintf(stderr, "\n");
}
#endif

static void pack_A_tile_int8(const Mat& A, Mat& AT, int i, int max_ii, int k, int max_kk)
{
#if NCNN_RUNTIME_CPU && NCNN_AVX512VNNI && __AVX512F__ && !__AVX512VNNI__
Expand Down Expand Up @@ -3835,13 +3848,12 @@ static void pack_A_tile_fp32_to_int8(const Mat& A, Mat& AT, int i, int max_ii, i
p0++;
}
}
}

#if !__AVX2__
if (max_ii >= 8)
{
pp = pp1;
}
pp1 = pp + max_kk * 4;
#endif
}
#endif // __AVX__
for (; ii + 3 < max_ii; ii += 4)
{
Expand Down Expand Up @@ -7139,13 +7151,12 @@ static void transpose_pack_A_tile_fp32_to_int8(const Mat& A, Mat& AT, int i, int
p0 += A_hstep;
}
}
}

#if !__AVX2__
if (max_ii >= 8)
{
pp = pp1;
}
pp1 = pp + max_kk * 4;
#endif
}
#endif // __AVX__
for (; ii + 3 < max_ii; ii += 4)
{
Expand Down Expand Up @@ -15361,13 +15372,12 @@ static void unpack_output_tile_int32_to_fp32(const Mat& topT, const Mat& C, Mat&
}
}
}
}

#if !__AVX2__
if (max_ii >= 8)
{
pp = pp1;
}
pp1 = pp + max_jj * 4;
#endif
}
#endif // __AVX__
for (; ii + 3 < max_ii; ii += 4)
{
Expand Down

0 comments on commit 459cf4c

Please sign in to comment.