DPDK 25.03.0-rc0
rte_xxh64_avx512.h
1/* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2020 Intel Corporation
3 */
4
5#ifndef RTE_XXH64_AVX512_H
6#define RTE_XXH64_AVX512_H
7
8#include <rte_common.h>
9#include <immintrin.h>
10
11#ifdef __cplusplus
12extern "C" {
13#endif
14
15/* 0b1001111000110111011110011011000110000101111010111100101010000111 */
16static const uint64_t PRIME64_1 = 0x9E3779B185EBCA87ULL;
17/* 0b1100001010110010101011100011110100100111110101001110101101001111 */
18static const uint64_t PRIME64_2 = 0xC2B2AE3D27D4EB4FULL;
19/* 0b0001011001010110011001111011000110011110001101110111100111111001 */
20static const uint64_t PRIME64_3 = 0x165667B19E3779F9ULL;
21/* 0b1000010111101011110010100111011111000010101100101010111001100011 */
22static const uint64_t PRIME64_4 = 0x85EBCA77C2B2AE63ULL;
23/* 0b0010011111010100111010110010111100010110010101100110011111000101 */
24static const uint64_t PRIME64_5 = 0x27D4EB2F165667C5ULL;
25
26static __rte_always_inline __m512i
27xxh64_round_avx512(__m512i hash, __m512i input)
28{
29 hash = _mm512_madd52lo_epu64(hash,
30 input,
31 _mm512_set1_epi64(PRIME64_2));
32
33 hash = _mm512_rol_epi64(hash, 31);
34
35 return hash;
36}
37
38static __rte_always_inline __m512i
39xxh64_fmix_avx512(__m512i hash)
40{
41 hash = _mm512_xor_si512(hash, _mm512_srli_epi64(hash, 33));
42
43 return hash;
44}
45
46static __rte_always_inline __m256i
47rte_xxh64_sketch_avx512(const void *key, uint32_t key_len,
48 __m512i v_seed, uint32_t modulo)
49{
50 __m512i v_prime64_5, v_hash;
51 size_t remaining = key_len;
52 size_t offset = 0;
53 __m512i input;
54
55 v_prime64_5 = _mm512_set1_epi64(PRIME64_5);
56 v_hash = _mm512_add_epi64
57 (_mm512_add_epi64(v_seed, v_prime64_5),
58 _mm512_set1_epi64(key_len));
59
60 while (remaining >= 8) {
61 input = _mm512_set1_epi64(*(uint64_t *)RTE_PTR_ADD(key, offset));
62 v_hash = _mm512_xor_epi64(v_hash,
63 xxh64_round_avx512(_mm512_setzero_si512(), input));
64 v_hash = _mm512_madd52lo_epu64(_mm512_set1_epi64(PRIME64_4),
65 v_hash,
66 _mm512_set1_epi64(PRIME64_1));
67
68 remaining -= 8;
69 offset += 8;
70 }
71
72 if (remaining >= 4) {
73 input = _mm512_set1_epi64
74 (*(uint32_t *)RTE_PTR_ADD(key, offset));
75 v_hash = _mm512_xor_epi64(v_hash,
76 _mm512_mullo_epi64(input,
77 _mm512_set1_epi64(PRIME64_1)));
78 v_hash = _mm512_madd52lo_epu64
79 (_mm512_set1_epi64(PRIME64_3),
80 _mm512_rol_epi64(v_hash, 23),
81 _mm512_set1_epi64(PRIME64_2));
82
83 offset += 4;
84 remaining -= 4;
85 }
86
87 while (remaining != 0) {
88 input = _mm512_set1_epi64
89 (*(uint8_t *)RTE_PTR_ADD(key, offset));
90 v_hash = _mm512_xor_epi64(v_hash,
91 _mm512_mullo_epi64(input,
92 _mm512_set1_epi64(PRIME64_5)));
93 v_hash = _mm512_mullo_epi64
94 (_mm512_rol_epi64(v_hash, 11),
95 _mm512_set1_epi64(PRIME64_1));
96 offset++;
97 remaining--;
98 }
99
100 v_hash = xxh64_fmix_avx512(v_hash);
101
102 /*
103 * theoritically, such modular operations can be replaced by
104 * _mm512_rem_epi64(), but seems it depends on the compiler's
105 * implementation. so here is the limitation that the modulo
106 * value should be power of 2.
107 */
108 __m512i v_hash_remainder = _mm512_set1_epi64((modulo - 1));
109
110 return _mm512_cvtepi64_epi32(_mm512_and_si512(v_hash, v_hash_remainder));
111}
112
113#ifdef __cplusplus
114}
115#endif
116
117#endif /* RTE_XXH64_AVX512_H */
#define RTE_PTR_ADD(ptr, x)
Definition: rte_common.h:469
#define __rte_always_inline
Definition: rte_common.h:413