DPDK 25.03.0-rc0
rte_ptr_compress.h
Go to the documentation of this file.
1/* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2024 Arm Limited
3 */
4
5#ifndef RTE_PTR_COMPRESS_H
6#define RTE_PTR_COMPRESS_H
7
33#include <stdint.h>
34#include <inttypes.h>
35
36#include <rte_bitops.h>
38#include <rte_common.h>
39#include <rte_debug.h>
40#include <rte_vect.h>
41
42#ifdef __cplusplus
43extern "C" {
44#endif
45
56#define RTE_PTR_COMPRESS_BITS_NEEDED_FOR_POINTER_WITHIN_RANGE(mem_length) \
57 (((uint64_t)mem_length) < 2 ? 1 : \
58 (sizeof(uint64_t) * CHAR_BIT - \
59 rte_clz64((uint64_t)mem_length - 1)))
60
70#define RTE_PTR_COMPRESS_BIT_SHIFT_FROM_ALIGNMENT(alignment) \
71 ((alignment) == 0 ? 0 : rte_ctz64((uint64_t)alignment))
72
85#define RTE_PTR_COMPRESS_CAN_COMPRESS_16_SHIFT(mem_length, obj_alignment) \
86 ((RTE_PTR_COMPRESS_BITS_NEEDED_FOR_POINTER_WITHIN_RANGE(mem_length) - \
87 RTE_PTR_COMPRESS_BIT_SHIFT_FROM_ALIGNMENT(obj_alignment)) <= 16 ? 1 : 0)
88
101#define RTE_PTR_COMPRESS_CAN_COMPRESS_32_SHIFT(mem_length, obj_alignment) \
102 ((RTE_PTR_COMPRESS_BITS_NEEDED_FOR_POINTER_WITHIN_RANGE(mem_length) - \
103 RTE_PTR_COMPRESS_BIT_SHIFT_FROM_ALIGNMENT(obj_alignment)) <= 32 ? 1 : 0)
104
128static __rte_always_inline void
129rte_ptr_compress_32_shift(void *ptr_base, void * const *src_table,
130 uint32_t *dest_table, size_t n, uint8_t bit_shift)
131{
132 size_t i = 0;
133#if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32
134 svuint64_t v_ptr_table;
135 do {
136 svbool_t pg = svwhilelt_b64(i, n);
137 v_ptr_table = svld1_u64(pg, (uint64_t *)src_table + i);
138 v_ptr_table = svsub_x(pg, v_ptr_table, (uint64_t)ptr_base);
139 v_ptr_table = svlsr_x(pg, v_ptr_table, bit_shift);
140 svst1w(pg, &dest_table[i], v_ptr_table);
141 i += svcntd();
142 } while (i < n);
143#elif defined __ARM_NEON && !defined RTE_ARCH_ARMv8_AARCH32
144 uintptr_t ptr_diff;
145 uint64x2_t v_ptr_table;
146 /* right shift is done by left shifting by negative int */
147 int64x2_t v_shift = vdupq_n_s64(-bit_shift);
148 uint64x2_t v_ptr_base = vdupq_n_u64((uint64_t)ptr_base);
149 const size_t n_even = n & ~0x1;
150 for (; i < n_even; i += 2) {
151 v_ptr_table = vld1q_u64((const uint64_t *)src_table + i);
152 v_ptr_table = vsubq_u64(v_ptr_table, v_ptr_base);
153 v_ptr_table = vshlq_u64(v_ptr_table, v_shift);
154 vst1_u32(dest_table + i, vqmovn_u64(v_ptr_table));
155 }
156 /* process leftover single item in case of odd number of n */
157 if (unlikely(n & 0x1)) {
158 ptr_diff = RTE_PTR_DIFF(src_table[i], ptr_base);
159 dest_table[i] = (uint32_t) (ptr_diff >> bit_shift);
160 }
161#else
162 uintptr_t ptr_diff;
163 for (; i < n; i++) {
164 ptr_diff = RTE_PTR_DIFF(src_table[i], ptr_base);
165 ptr_diff = ptr_diff >> bit_shift;
166 RTE_ASSERT(ptr_diff <= UINT32_MAX);
167 dest_table[i] = (uint32_t) ptr_diff;
168 }
169#endif
170}
171
189static __rte_always_inline void
190rte_ptr_decompress_32_shift(void *ptr_base, uint32_t const *src_table,
191 void **dest_table, size_t n, uint8_t bit_shift)
192{
193 size_t i = 0;
194#if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32
195 svuint64_t v_ptr_table;
196 do {
197 svbool_t pg = svwhilelt_b64(i, n);
198 v_ptr_table = svld1uw_u64(pg, &src_table[i]);
199 v_ptr_table = svlsl_x(pg, v_ptr_table, bit_shift);
200 v_ptr_table = svadd_x(pg, v_ptr_table, (uint64_t)ptr_base);
201 svst1(pg, (uint64_t *)dest_table + i, v_ptr_table);
202 i += svcntd();
203 } while (i < n);
204#elif defined __ARM_NEON && !defined RTE_ARCH_ARMv8_AARCH32
205 uintptr_t ptr_diff;
206 uint64x2_t v_ptr_table;
207 int64x2_t v_shift = vdupq_n_s64(bit_shift);
208 uint64x2_t v_ptr_base = vdupq_n_u64((uint64_t)ptr_base);
209 const size_t n_even = n & ~0x1;
210 for (; i < n_even; i += 2) {
211 v_ptr_table = vmovl_u32(vld1_u32(src_table + i));
212 v_ptr_table = vshlq_u64(v_ptr_table, v_shift);
213 v_ptr_table = vaddq_u64(v_ptr_table, v_ptr_base);
214 vst1q_u64((uint64_t *)dest_table + i, v_ptr_table);
215 }
216 /* process leftover single item in case of odd number of n */
217 if (unlikely(n & 0x1)) {
218 ptr_diff = ((uintptr_t) src_table[i]) << bit_shift;
219 dest_table[i] = RTE_PTR_ADD(ptr_base, ptr_diff);
220 }
221#else
222 uintptr_t ptr_diff;
223 for (; i < n; i++) {
224 ptr_diff = ((uintptr_t) src_table[i]) << bit_shift;
225 dest_table[i] = RTE_PTR_ADD(ptr_base, ptr_diff);
226 }
227#endif
228}
229
253static __rte_always_inline void
254rte_ptr_compress_16_shift(void *ptr_base, void * const *src_table,
255 uint16_t *dest_table, size_t n, uint8_t bit_shift)
256{
257
258 size_t i = 0;
259#if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32
260 svuint64_t v_ptr_table;
261 do {
262 svbool_t pg = svwhilelt_b64(i, n);
263 v_ptr_table = svld1_u64(pg, (uint64_t *)src_table + i);
264 v_ptr_table = svsub_x(pg, v_ptr_table, (uint64_t)ptr_base);
265 v_ptr_table = svlsr_x(pg, v_ptr_table, bit_shift);
266 svst1h(pg, &dest_table[i], v_ptr_table);
267 i += svcntd();
268 } while (i < n);
269#else
270 uintptr_t ptr_diff;
271 for (; i < n; i++) {
272 ptr_diff = RTE_PTR_DIFF(src_table[i], ptr_base);
273 ptr_diff = ptr_diff >> bit_shift;
274 RTE_ASSERT(ptr_diff <= UINT16_MAX);
275 dest_table[i] = (uint16_t) ptr_diff;
276 }
277#endif
278}
279
297static __rte_always_inline void
298rte_ptr_decompress_16_shift(void *ptr_base, uint16_t const *src_table,
299 void **dest_table, size_t n, uint8_t bit_shift)
300{
301 size_t i = 0;
302#if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32
303 svuint64_t v_ptr_table;
304 do {
305 svbool_t pg = svwhilelt_b64(i, n);
306 v_ptr_table = svld1uh_u64(pg, &src_table[i]);
307 v_ptr_table = svlsl_x(pg, v_ptr_table, bit_shift);
308 v_ptr_table = svadd_x(pg, v_ptr_table, (uint64_t)ptr_base);
309 svst1(pg, (uint64_t *)dest_table + i, v_ptr_table);
310 i += svcntd();
311 } while (i < n);
312#else
313 uintptr_t ptr_diff;
314 for (; i < n; i++) {
315 ptr_diff = ((uintptr_t) src_table[i]) << bit_shift;
316 dest_table[i] = RTE_PTR_ADD(ptr_base, ptr_diff);
317 }
318#endif
319}
320
321#ifdef __cplusplus
322}
323#endif
324
325#endif /* RTE_PTR_COMPRESS_H */
#define unlikely(x)
#define RTE_PTR_DIFF(ptr1, ptr2)
Definition: rte_common.h:481
#define RTE_PTR_ADD(ptr, x)
Definition: rte_common.h:469
#define __rte_always_inline
Definition: rte_common.h:413
static __rte_always_inline void rte_ptr_compress_16_shift(void *ptr_base, void *const *src_table, uint16_t *dest_table, size_t n, uint8_t bit_shift)
static __rte_always_inline void rte_ptr_decompress_32_shift(void *ptr_base, uint32_t const *src_table, void **dest_table, size_t n, uint8_t bit_shift)
static __rte_always_inline void rte_ptr_compress_32_shift(void *ptr_base, void *const *src_table, uint32_t *dest_table, size_t n, uint8_t bit_shift)
static __rte_always_inline void rte_ptr_decompress_16_shift(void *ptr_base, uint16_t const *src_table, void **dest_table, size_t n, uint8_t bit_shift)