DPDK  24.03.0
rte_ring_elem_pvt.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright (c) 2017,2018 HXT-semitech Corporation.
4  * Copyright (c) 2007-2009 Kip Macy kmacy@freebsd.org
5  * All rights reserved.
6  * Derived from FreeBSD's bufring.h
7  * Used as BSD-3 Licensed with permission from Kip Macy.
8  */
9 
10 #ifndef _RTE_RING_ELEM_PVT_H_
11 #define _RTE_RING_ELEM_PVT_H_
12 
13 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 120000)
14 #pragma GCC diagnostic push
15 #pragma GCC diagnostic ignored "-Wstringop-overflow"
16 #pragma GCC diagnostic ignored "-Wstringop-overread"
17 #endif
18 
19 static __rte_always_inline void
20 __rte_ring_enqueue_elems_32(struct rte_ring *r, const uint32_t size,
21  uint32_t idx, const void *obj_table, uint32_t n)
22 {
23  unsigned int i;
24  uint32_t *ring = (uint32_t *)&r[1];
25  const uint32_t *obj = (const uint32_t *)obj_table;
26  if (likely(idx + n <= size)) {
27  for (i = 0; i < (n & ~0x7); i += 8, idx += 8) {
28  ring[idx] = obj[i];
29  ring[idx + 1] = obj[i + 1];
30  ring[idx + 2] = obj[i + 2];
31  ring[idx + 3] = obj[i + 3];
32  ring[idx + 4] = obj[i + 4];
33  ring[idx + 5] = obj[i + 5];
34  ring[idx + 6] = obj[i + 6];
35  ring[idx + 7] = obj[i + 7];
36  }
37  switch (n & 0x7) {
38  case 7:
39  ring[idx++] = obj[i++]; /* fallthrough */
40  case 6:
41  ring[idx++] = obj[i++]; /* fallthrough */
42  case 5:
43  ring[idx++] = obj[i++]; /* fallthrough */
44  case 4:
45  ring[idx++] = obj[i++]; /* fallthrough */
46  case 3:
47  ring[idx++] = obj[i++]; /* fallthrough */
48  case 2:
49  ring[idx++] = obj[i++]; /* fallthrough */
50  case 1:
51  ring[idx++] = obj[i++]; /* fallthrough */
52  }
53  } else {
54  for (i = 0; idx < size; i++, idx++)
55  ring[idx] = obj[i];
56  /* Start at the beginning */
57  for (idx = 0; i < n; i++, idx++)
58  ring[idx] = obj[i];
59  }
60 }
61 
62 static __rte_always_inline void
63 __rte_ring_enqueue_elems_64(struct rte_ring *r, uint32_t prod_head,
64  const void *obj_table, uint32_t n)
65 {
66  unsigned int i;
67  const uint32_t size = r->size;
68  uint32_t idx = prod_head & r->mask;
69  uint64_t *ring = (uint64_t *)&r[1];
70  const unaligned_uint64_t *obj = (const unaligned_uint64_t *)obj_table;
71  if (likely(idx + n <= size)) {
72  for (i = 0; i < (n & ~0x3); i += 4, idx += 4) {
73  ring[idx] = obj[i];
74  ring[idx + 1] = obj[i + 1];
75  ring[idx + 2] = obj[i + 2];
76  ring[idx + 3] = obj[i + 3];
77  }
78  switch (n & 0x3) {
79  case 3:
80  ring[idx++] = obj[i++]; /* fallthrough */
81  case 2:
82  ring[idx++] = obj[i++]; /* fallthrough */
83  case 1:
84  ring[idx++] = obj[i++];
85  }
86  } else {
87  for (i = 0; idx < size; i++, idx++)
88  ring[idx] = obj[i];
89  /* Start at the beginning */
90  for (idx = 0; i < n; i++, idx++)
91  ring[idx] = obj[i];
92  }
93 }
94 
95 static __rte_always_inline void
96 __rte_ring_enqueue_elems_128(struct rte_ring *r, uint32_t prod_head,
97  const void *obj_table, uint32_t n)
98 {
99  unsigned int i;
100  const uint32_t size = r->size;
101  uint32_t idx = prod_head & r->mask;
102  rte_int128_t *ring = (rte_int128_t *)&r[1];
103  const rte_int128_t *obj = (const rte_int128_t *)obj_table;
104  if (likely(idx + n <= size)) {
105  for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
106  memcpy((void *)(ring + idx),
107  (const void *)(obj + i), 32);
108  switch (n & 0x1) {
109  case 1:
110  memcpy((void *)(ring + idx),
111  (const void *)(obj + i), 16);
112  }
113  } else {
114  for (i = 0; idx < size; i++, idx++)
115  memcpy((void *)(ring + idx),
116  (const void *)(obj + i), 16);
117  /* Start at the beginning */
118  for (idx = 0; i < n; i++, idx++)
119  memcpy((void *)(ring + idx),
120  (const void *)(obj + i), 16);
121  }
122 }
123 
124 /* the actual enqueue of elements on the ring.
125  * Placed here since identical code needed in both
126  * single and multi producer enqueue functions.
127  */
128 static __rte_always_inline void
129 __rte_ring_enqueue_elems(struct rte_ring *r, uint32_t prod_head,
130  const void *obj_table, uint32_t esize, uint32_t num)
131 {
132  /* 8B and 16B copies implemented individually to retain
133  * the current performance.
134  */
135  if (esize == 8)
136  __rte_ring_enqueue_elems_64(r, prod_head, obj_table, num);
137  else if (esize == 16)
138  __rte_ring_enqueue_elems_128(r, prod_head, obj_table, num);
139  else {
140  uint32_t idx, scale, nr_idx, nr_num, nr_size;
141 
142  /* Normalize to uint32_t */
143  scale = esize / sizeof(uint32_t);
144  nr_num = num * scale;
145  idx = prod_head & r->mask;
146  nr_idx = idx * scale;
147  nr_size = r->size * scale;
148  __rte_ring_enqueue_elems_32(r, nr_size, nr_idx,
149  obj_table, nr_num);
150  }
151 }
152 
153 static __rte_always_inline void
154 __rte_ring_dequeue_elems_32(struct rte_ring *r, const uint32_t size,
155  uint32_t idx, void *obj_table, uint32_t n)
156 {
157  unsigned int i;
158  uint32_t *ring = (uint32_t *)&r[1];
159  uint32_t *obj = (uint32_t *)obj_table;
160  if (likely(idx + n <= size)) {
161  for (i = 0; i < (n & ~0x7); i += 8, idx += 8) {
162  obj[i] = ring[idx];
163  obj[i + 1] = ring[idx + 1];
164  obj[i + 2] = ring[idx + 2];
165  obj[i + 3] = ring[idx + 3];
166  obj[i + 4] = ring[idx + 4];
167  obj[i + 5] = ring[idx + 5];
168  obj[i + 6] = ring[idx + 6];
169  obj[i + 7] = ring[idx + 7];
170  }
171  switch (n & 0x7) {
172  case 7:
173  obj[i++] = ring[idx++]; /* fallthrough */
174  case 6:
175  obj[i++] = ring[idx++]; /* fallthrough */
176  case 5:
177  obj[i++] = ring[idx++]; /* fallthrough */
178  case 4:
179  obj[i++] = ring[idx++]; /* fallthrough */
180  case 3:
181  obj[i++] = ring[idx++]; /* fallthrough */
182  case 2:
183  obj[i++] = ring[idx++]; /* fallthrough */
184  case 1:
185  obj[i++] = ring[idx++]; /* fallthrough */
186  }
187  } else {
188  for (i = 0; idx < size; i++, idx++)
189  obj[i] = ring[idx];
190  /* Start at the beginning */
191  for (idx = 0; i < n; i++, idx++)
192  obj[i] = ring[idx];
193  }
194 }
195 
196 static __rte_always_inline void
197 __rte_ring_dequeue_elems_64(struct rte_ring *r, uint32_t cons_head,
198  void *obj_table, uint32_t n)
199 {
200  unsigned int i;
201  const uint32_t size = r->size;
202  uint32_t idx = cons_head & r->mask;
203  uint64_t *ring = (uint64_t *)&r[1];
204  unaligned_uint64_t *obj = (unaligned_uint64_t *)obj_table;
205  if (likely(idx + n <= size)) {
206  for (i = 0; i < (n & ~0x3); i += 4, idx += 4) {
207  obj[i] = ring[idx];
208  obj[i + 1] = ring[idx + 1];
209  obj[i + 2] = ring[idx + 2];
210  obj[i + 3] = ring[idx + 3];
211  }
212  switch (n & 0x3) {
213  case 3:
214  obj[i++] = ring[idx++]; /* fallthrough */
215  case 2:
216  obj[i++] = ring[idx++]; /* fallthrough */
217  case 1:
218  obj[i++] = ring[idx++]; /* fallthrough */
219  }
220  } else {
221  for (i = 0; idx < size; i++, idx++)
222  obj[i] = ring[idx];
223  /* Start at the beginning */
224  for (idx = 0; i < n; i++, idx++)
225  obj[i] = ring[idx];
226  }
227 }
228 
229 static __rte_always_inline void
230 __rte_ring_dequeue_elems_128(struct rte_ring *r, uint32_t cons_head,
231  void *obj_table, uint32_t n)
232 {
233  unsigned int i;
234  const uint32_t size = r->size;
235  uint32_t idx = cons_head & r->mask;
236  rte_int128_t *ring = (rte_int128_t *)&r[1];
237  rte_int128_t *obj = (rte_int128_t *)obj_table;
238  if (likely(idx + n <= size)) {
239  for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
240  memcpy((void *)(obj + i), (void *)(ring + idx), 32);
241  switch (n & 0x1) {
242  case 1:
243  memcpy((void *)(obj + i), (void *)(ring + idx), 16);
244  }
245  } else {
246  for (i = 0; idx < size; i++, idx++)
247  memcpy((void *)(obj + i), (void *)(ring + idx), 16);
248  /* Start at the beginning */
249  for (idx = 0; i < n; i++, idx++)
250  memcpy((void *)(obj + i), (void *)(ring + idx), 16);
251  }
252 }
253 
254 /* the actual dequeue of elements from the ring.
255  * Placed here since identical code needed in both
256  * single and multi producer enqueue functions.
257  */
258 static __rte_always_inline void
259 __rte_ring_dequeue_elems(struct rte_ring *r, uint32_t cons_head,
260  void *obj_table, uint32_t esize, uint32_t num)
261 {
262  /* 8B and 16B copies implemented individually to retain
263  * the current performance.
264  */
265  if (esize == 8)
266  __rte_ring_dequeue_elems_64(r, cons_head, obj_table, num);
267  else if (esize == 16)
268  __rte_ring_dequeue_elems_128(r, cons_head, obj_table, num);
269  else {
270  uint32_t idx, scale, nr_idx, nr_num, nr_size;
271 
272  /* Normalize to uint32_t */
273  scale = esize / sizeof(uint32_t);
274  nr_num = num * scale;
275  idx = cons_head & r->mask;
276  nr_idx = idx * scale;
277  nr_size = r->size * scale;
278  __rte_ring_dequeue_elems_32(r, nr_size, nr_idx,
279  obj_table, nr_num);
280  }
281 }
282 
283 /* Between load and load. there might be cpu reorder in weak model
284  * (powerpc/arm).
285  * There are 2 choices for the users
286  * 1.use rmb() memory barrier
287  * 2.use one-direction load_acquire/store_release barrier
288  * It depends on performance test results.
289  */
290 #ifdef RTE_USE_C11_MEM_MODEL
291 #include "rte_ring_c11_pvt.h"
292 #else
293 #include "rte_ring_generic_pvt.h"
294 #endif
295 
320 static __rte_always_inline unsigned int
321 __rte_ring_do_enqueue_elem(struct rte_ring *r, const void *obj_table,
322  unsigned int esize, unsigned int n,
323  enum rte_ring_queue_behavior behavior, unsigned int is_sp,
324  unsigned int *free_space)
325 {
326  uint32_t prod_head, prod_next;
327  uint32_t free_entries;
328 
329  n = __rte_ring_move_prod_head(r, is_sp, n, behavior,
330  &prod_head, &prod_next, &free_entries);
331  if (n == 0)
332  goto end;
333 
334  __rte_ring_enqueue_elems(r, prod_head, obj_table, esize, n);
335 
336  __rte_ring_update_tail(&r->prod, prod_head, prod_next, is_sp, 1);
337 end:
338  if (free_space != NULL)
339  *free_space = free_entries - n;
340  return n;
341 }
342 
367 static __rte_always_inline unsigned int
368 __rte_ring_do_dequeue_elem(struct rte_ring *r, void *obj_table,
369  unsigned int esize, unsigned int n,
370  enum rte_ring_queue_behavior behavior, unsigned int is_sc,
371  unsigned int *available)
372 {
373  uint32_t cons_head, cons_next;
374  uint32_t entries;
375 
376  n = __rte_ring_move_cons_head(r, (int)is_sc, n, behavior,
377  &cons_head, &cons_next, &entries);
378  if (n == 0)
379  goto end;
380 
381  __rte_ring_dequeue_elems(r, cons_head, obj_table, esize, n);
382 
383  __rte_ring_update_tail(&r->cons, cons_head, cons_next, is_sc, 0);
384 
385 end:
386  if (available != NULL)
387  *available = entries - n;
388  return n;
389 }
390 
391 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 120000)
392 #pragma GCC diagnostic pop
393 #endif
394 
395 #endif /* _RTE_RING_ELEM_PVT_H_ */
#define __rte_always_inline
Definition: rte_common.h:355
rte_ring_queue_behavior
Definition: rte_ring_core.h:44
#define likely(x)
uint32_t size
uint32_t mask