static_always_inline u32
clib_crc32c (u8 * s, int len)
{
u32 v = 0;
#if __x86_64__
for (; len >= 8; len -= 8, s += 8)
v = _mm_crc32_u64 (v, *((u64 *) s));
#else
/* workaround weird GCC bug when using _mm_crc32_u32
which happens with -O2 optimization */
#if !defined (__i686__)
volatile ("":::"memory");
#endif
#endif
for (; len >= 4; len -= 4, s += 4)
v = _mm_crc32_u32 (v, *((u32 *) s));
for (; len >= 2; len -= 2, s += 2)
v = _mm_crc32_u16 (v, *((u16 *) s));
for (; len >= 1; len -= 1, s += 1)
v = _mm_crc32_u8 (v, *((u16 *) s));
return v;
}
在64位CPU上,先8字节一组进行计算。
然后4字节一组进行计算,最后2字节一组,1字节一组进行计算。
#define crc32_u64 __crc32cd
#define crc32_u32 __crc32cw
static_always_inline u32
clib_crc32c (u8 * s, int len)
{
u32 v = 0;
for (; len >= 8; len -= 8, s += 8)
v = __crc32cd (v, *((u64 *) s));
for (; len >= 4; len -= 4, s += 4)
v = __crc32cw (v, *((u32 *) s));
for (; len >= 2; len -= 2, s += 2)
v = __crc32ch (v, *((u16 *) s));
for (; len >= 1; len -= 1, s += 1)
v = __crc32cb (v, *((u8 *) s));
return v;
}
ARM cpu 与 X86 CPU类似。