DPDK  24.11.0-rc3
rte_ptr_compress.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2024 Arm Limited
3  */
4 
5 #ifndef RTE_PTR_COMPRESS_H
6 #define RTE_PTR_COMPRESS_H
7 
33 #include <stdint.h>
34 #include <inttypes.h>
35 
36 #include <rte_bitops.h>
37 #include <rte_branch_prediction.h>
38 #include <rte_common.h>
39 #include <rte_debug.h>
40 #include <rte_vect.h>
41 
42 #ifdef __cplusplus
43 extern "C" {
44 #endif
45 
56 #define RTE_PTR_COMPRESS_BITS_NEEDED_FOR_POINTER_WITHIN_RANGE(mem_length) \
57  (((uint64_t)mem_length) < 2 ? 1 : \
58  (sizeof(uint64_t) * CHAR_BIT - \
59  rte_clz64((uint64_t)mem_length - 1)))
60 
70 #define RTE_PTR_COMPRESS_BIT_SHIFT_FROM_ALIGNMENT(alignment) \
71  ((alignment) == 0 ? 0 : rte_ctz64((uint64_t)alignment))
72 
85 #define RTE_PTR_COMPRESS_CAN_COMPRESS_16_SHIFT(mem_length, obj_alignment) \
86  ((RTE_PTR_COMPRESS_BITS_NEEDED_FOR_POINTER_WITHIN_RANGE(mem_length) - \
87  RTE_PTR_COMPRESS_BIT_SHIFT_FROM_ALIGNMENT(obj_alignment)) <= 16 ? 1 : 0)
88 
101 #define RTE_PTR_COMPRESS_CAN_COMPRESS_32_SHIFT(mem_length, obj_alignment) \
102  ((RTE_PTR_COMPRESS_BITS_NEEDED_FOR_POINTER_WITHIN_RANGE(mem_length) - \
103  RTE_PTR_COMPRESS_BIT_SHIFT_FROM_ALIGNMENT(obj_alignment)) <= 32 ? 1 : 0)
104 
128 static __rte_always_inline void
129 rte_ptr_compress_32_shift(void *ptr_base, void * const *src_table,
130  uint32_t *dest_table, size_t n, uint8_t bit_shift)
131 {
132  size_t i = 0;
133 #if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32
134  svuint64_t v_ptr_table;
135  do {
136  svbool_t pg = svwhilelt_b64(i, n);
137  v_ptr_table = svld1_u64(pg, (uint64_t *)src_table + i);
138  v_ptr_table = svsub_x(pg, v_ptr_table, (uint64_t)ptr_base);
139  v_ptr_table = svlsr_x(pg, v_ptr_table, bit_shift);
140  svst1w(pg, &dest_table[i], v_ptr_table);
141  i += svcntd();
142  } while (i < n);
143 #elif defined __ARM_NEON && !defined RTE_ARCH_ARMv8_AARCH32
144  uintptr_t ptr_diff;
145  uint64x2_t v_ptr_table;
146  /* right shift is done by left shifting by negative int */
147  int64x2_t v_shift = vdupq_n_s64(-bit_shift);
148  uint64x2_t v_ptr_base = vdupq_n_u64((uint64_t)ptr_base);
149  const size_t n_even = n & ~0x1;
150  for (; i < n_even; i += 2) {
151  v_ptr_table = vld1q_u64((const uint64_t *)src_table + i);
152  v_ptr_table = vsubq_u64(v_ptr_table, v_ptr_base);
153  v_ptr_table = vshlq_u64(v_ptr_table, v_shift);
154  vst1_u32(dest_table + i, vqmovn_u64(v_ptr_table));
155  }
156  /* process leftover single item in case of odd number of n */
157  if (unlikely(n & 0x1)) {
158  ptr_diff = RTE_PTR_DIFF(src_table[i], ptr_base);
159  dest_table[i] = (uint32_t) (ptr_diff >> bit_shift);
160  }
161 #else
162  uintptr_t ptr_diff;
163  for (; i < n; i++) {
164  ptr_diff = RTE_PTR_DIFF(src_table[i], ptr_base);
165  ptr_diff = ptr_diff >> bit_shift;
166  RTE_ASSERT(ptr_diff <= UINT32_MAX);
167  dest_table[i] = (uint32_t) ptr_diff;
168  }
169 #endif
170 }
171 
189 static __rte_always_inline void
190 rte_ptr_decompress_32_shift(void *ptr_base, uint32_t const *src_table,
191  void **dest_table, size_t n, uint8_t bit_shift)
192 {
193  size_t i = 0;
194 #if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32
195  svuint64_t v_ptr_table;
196  do {
197  svbool_t pg = svwhilelt_b64(i, n);
198  v_ptr_table = svld1uw_u64(pg, &src_table[i]);
199  v_ptr_table = svlsl_x(pg, v_ptr_table, bit_shift);
200  v_ptr_table = svadd_x(pg, v_ptr_table, (uint64_t)ptr_base);
201  svst1(pg, (uint64_t *)dest_table + i, v_ptr_table);
202  i += svcntd();
203  } while (i < n);
204 #elif defined __ARM_NEON && !defined RTE_ARCH_ARMv8_AARCH32
205  uintptr_t ptr_diff;
206  uint64x2_t v_ptr_table;
207  int64x2_t v_shift = vdupq_n_s64(bit_shift);
208  uint64x2_t v_ptr_base = vdupq_n_u64((uint64_t)ptr_base);
209  const size_t n_even = n & ~0x1;
210  for (; i < n_even; i += 2) {
211  v_ptr_table = vmovl_u32(vld1_u32(src_table + i));
212  v_ptr_table = vshlq_u64(v_ptr_table, v_shift);
213  v_ptr_table = vaddq_u64(v_ptr_table, v_ptr_base);
214  vst1q_u64((uint64_t *)dest_table + i, v_ptr_table);
215  }
216  /* process leftover single item in case of odd number of n */
217  if (unlikely(n & 0x1)) {
218  ptr_diff = ((uintptr_t) src_table[i]) << bit_shift;
219  dest_table[i] = RTE_PTR_ADD(ptr_base, ptr_diff);
220  }
221 #else
222  uintptr_t ptr_diff;
223  for (; i < n; i++) {
224  ptr_diff = ((uintptr_t) src_table[i]) << bit_shift;
225  dest_table[i] = RTE_PTR_ADD(ptr_base, ptr_diff);
226  }
227 #endif
228 }
229 
253 static __rte_always_inline void
254 rte_ptr_compress_16_shift(void *ptr_base, void * const *src_table,
255  uint16_t *dest_table, size_t n, uint8_t bit_shift)
256 {
257 
258  size_t i = 0;
259 #if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32
260  svuint64_t v_ptr_table;
261  do {
262  svbool_t pg = svwhilelt_b64(i, n);
263  v_ptr_table = svld1_u64(pg, (uint64_t *)src_table + i);
264  v_ptr_table = svsub_x(pg, v_ptr_table, (uint64_t)ptr_base);
265  v_ptr_table = svlsr_x(pg, v_ptr_table, bit_shift);
266  svst1h(pg, &dest_table[i], v_ptr_table);
267  i += svcntd();
268  } while (i < n);
269 #else
270  uintptr_t ptr_diff;
271  for (; i < n; i++) {
272  ptr_diff = RTE_PTR_DIFF(src_table[i], ptr_base);
273  ptr_diff = ptr_diff >> bit_shift;
274  RTE_ASSERT(ptr_diff <= UINT16_MAX);
275  dest_table[i] = (uint16_t) ptr_diff;
276  }
277 #endif
278 }
279 
297 static __rte_always_inline void
298 rte_ptr_decompress_16_shift(void *ptr_base, uint16_t const *src_table,
299  void **dest_table, size_t n, uint8_t bit_shift)
300 {
301  size_t i = 0;
302 #if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32
303  svuint64_t v_ptr_table;
304  do {
305  svbool_t pg = svwhilelt_b64(i, n);
306  v_ptr_table = svld1uh_u64(pg, &src_table[i]);
307  v_ptr_table = svlsl_x(pg, v_ptr_table, bit_shift);
308  v_ptr_table = svadd_x(pg, v_ptr_table, (uint64_t)ptr_base);
309  svst1(pg, (uint64_t *)dest_table + i, v_ptr_table);
310  i += svcntd();
311  } while (i < n);
312 #else
313  uintptr_t ptr_diff;
314  for (; i < n; i++) {
315  ptr_diff = ((uintptr_t) src_table[i]) << bit_shift;
316  dest_table[i] = RTE_PTR_ADD(ptr_base, ptr_diff);
317  }
318 #endif
319 }
320 
321 #ifdef __cplusplus
322 }
323 #endif
324 
325 #endif /* RTE_PTR_COMPRESS_H */
#define __rte_always_inline
Definition: rte_common.h:413
#define RTE_PTR_ADD(ptr, x)
Definition: rte_common.h:469
#define unlikely(x)
static __rte_always_inline void rte_ptr_decompress_16_shift(void *ptr_base, uint16_t const *src_table, void **dest_table, size_t n, uint8_t bit_shift)
static __rte_always_inline void rte_ptr_compress_16_shift(void *ptr_base, void *const *src_table, uint16_t *dest_table, size_t n, uint8_t bit_shift)
static __rte_always_inline void rte_ptr_decompress_32_shift(void *ptr_base, uint32_t const *src_table, void **dest_table, size_t n, uint8_t bit_shift)
static __rte_always_inline void rte_ptr_compress_32_shift(void *ptr_base, void *const *src_table, uint32_t *dest_table, size_t n, uint8_t bit_shift)
#define RTE_PTR_DIFF(ptr1, ptr2)
Definition: rte_common.h:481