DPDK 25.03.0-rc0
rte_ring_elem_pvt.h
1/* SPDX-License-Identifier: BSD-3-Clause
2 *
3 * Copyright (c) 2017,2018 HXT-semitech Corporation.
4 * Copyright (c) 2007-2009 Kip Macy kmacy@freebsd.org
5 * All rights reserved.
6 * Derived from FreeBSD's bufring.h
7 * Used as BSD-3 Licensed with permission from Kip Macy.
8 */
9
10#ifndef _RTE_RING_ELEM_PVT_H_
11#define _RTE_RING_ELEM_PVT_H_
12
13#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 120000)
14#pragma GCC diagnostic push
15#pragma GCC diagnostic ignored "-Wstringop-overflow"
16#pragma GCC diagnostic ignored "-Wstringop-overread"
17#endif
18
19static __rte_always_inline void
20__rte_ring_enqueue_elems_32(struct rte_ring *r, const uint32_t size,
21 uint32_t idx, const void *obj_table, uint32_t n)
22{
23 unsigned int i;
24 uint32_t *ring = (uint32_t *)&r[1];
25 const uint32_t *obj = (const uint32_t *)obj_table;
26 if (likely(idx + n <= size)) {
27 for (i = 0; i < (n & ~0x7); i += 8, idx += 8) {
28 ring[idx] = obj[i];
29 ring[idx + 1] = obj[i + 1];
30 ring[idx + 2] = obj[i + 2];
31 ring[idx + 3] = obj[i + 3];
32 ring[idx + 4] = obj[i + 4];
33 ring[idx + 5] = obj[i + 5];
34 ring[idx + 6] = obj[i + 6];
35 ring[idx + 7] = obj[i + 7];
36 }
37 switch (n & 0x7) {
38 case 7:
39 ring[idx++] = obj[i++]; /* fallthrough */
40 case 6:
41 ring[idx++] = obj[i++]; /* fallthrough */
42 case 5:
43 ring[idx++] = obj[i++]; /* fallthrough */
44 case 4:
45 ring[idx++] = obj[i++]; /* fallthrough */
46 case 3:
47 ring[idx++] = obj[i++]; /* fallthrough */
48 case 2:
49 ring[idx++] = obj[i++]; /* fallthrough */
50 case 1:
51 ring[idx++] = obj[i++]; /* fallthrough */
52 }
53 } else {
54 for (i = 0; idx < size; i++, idx++)
55 ring[idx] = obj[i];
56 /* Start at the beginning */
57 for (idx = 0; i < n; i++, idx++)
58 ring[idx] = obj[i];
59 }
60}
61
62static __rte_always_inline void
63__rte_ring_enqueue_elems_64(struct rte_ring *r, uint32_t prod_head,
64 const void *obj_table, uint32_t n)
65{
66 unsigned int i;
67 const uint32_t size = r->size;
68 uint32_t idx = prod_head & r->mask;
69 uint64_t *ring = (uint64_t *)&r[1];
70 const unaligned_uint64_t *obj = (const unaligned_uint64_t *)obj_table;
71 if (likely(idx + n <= size)) {
72 for (i = 0; i < (n & ~0x3); i += 4, idx += 4) {
73 ring[idx] = obj[i];
74 ring[idx + 1] = obj[i + 1];
75 ring[idx + 2] = obj[i + 2];
76 ring[idx + 3] = obj[i + 3];
77 }
78 switch (n & 0x3) {
79 case 3:
80 ring[idx++] = obj[i++]; /* fallthrough */
81 case 2:
82 ring[idx++] = obj[i++]; /* fallthrough */
83 case 1:
84 ring[idx++] = obj[i++];
85 }
86 } else {
87 for (i = 0; idx < size; i++, idx++)
88 ring[idx] = obj[i];
89 /* Start at the beginning */
90 for (idx = 0; i < n; i++, idx++)
91 ring[idx] = obj[i];
92 }
93}
94
95static __rte_always_inline void
96__rte_ring_enqueue_elems_128(struct rte_ring *r, uint32_t prod_head,
97 const void *obj_table, uint32_t n)
98{
99 unsigned int i;
100 const uint32_t size = r->size;
101 uint32_t idx = prod_head & r->mask;
102 rte_int128_t *ring = (rte_int128_t *)&r[1];
103 const rte_int128_t *obj = (const rte_int128_t *)obj_table;
104 if (likely(idx + n <= size)) {
105 for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
106 memcpy((void *)(ring + idx),
107 (const void *)(obj + i), 32);
108 switch (n & 0x1) {
109 case 1:
110 memcpy((void *)(ring + idx),
111 (const void *)(obj + i), 16);
112 }
113 } else {
114 for (i = 0; idx < size; i++, idx++)
115 memcpy((void *)(ring + idx),
116 (const void *)(obj + i), 16);
117 /* Start at the beginning */
118 for (idx = 0; i < n; i++, idx++)
119 memcpy((void *)(ring + idx),
120 (const void *)(obj + i), 16);
121 }
122}
123
124/* the actual enqueue of elements on the ring.
125 * Placed here since identical code needed in both
126 * single and multi producer enqueue functions.
127 */
128static __rte_always_inline void
129__rte_ring_enqueue_elems(struct rte_ring *r, uint32_t prod_head,
130 const void *obj_table, uint32_t esize, uint32_t num)
131{
132 /* 8B and 16B copies implemented individually to retain
133 * the current performance.
134 */
135 if (esize == 8)
136 __rte_ring_enqueue_elems_64(r, prod_head, obj_table, num);
137 else if (esize == 16)
138 __rte_ring_enqueue_elems_128(r, prod_head, obj_table, num);
139 else {
140 uint32_t idx, scale, nr_idx, nr_num, nr_size;
141
142 /* Normalize to uint32_t */
143 scale = esize / sizeof(uint32_t);
144 nr_num = num * scale;
145 idx = prod_head & r->mask;
146 nr_idx = idx * scale;
147 nr_size = r->size * scale;
148 __rte_ring_enqueue_elems_32(r, nr_size, nr_idx,
149 obj_table, nr_num);
150 }
151}
152
153static __rte_always_inline void
154__rte_ring_dequeue_elems_32(struct rte_ring *r, const uint32_t size,
155 uint32_t idx, void *obj_table, uint32_t n)
156{
157 unsigned int i;
158 uint32_t *ring = (uint32_t *)&r[1];
159 uint32_t *obj = (uint32_t *)obj_table;
160 if (likely(idx + n <= size)) {
161 for (i = 0; i < (n & ~0x7); i += 8, idx += 8) {
162 obj[i] = ring[idx];
163 obj[i + 1] = ring[idx + 1];
164 obj[i + 2] = ring[idx + 2];
165 obj[i + 3] = ring[idx + 3];
166 obj[i + 4] = ring[idx + 4];
167 obj[i + 5] = ring[idx + 5];
168 obj[i + 6] = ring[idx + 6];
169 obj[i + 7] = ring[idx + 7];
170 }
171 switch (n & 0x7) {
172 case 7:
173 obj[i++] = ring[idx++]; /* fallthrough */
174 case 6:
175 obj[i++] = ring[idx++]; /* fallthrough */
176 case 5:
177 obj[i++] = ring[idx++]; /* fallthrough */
178 case 4:
179 obj[i++] = ring[idx++]; /* fallthrough */
180 case 3:
181 obj[i++] = ring[idx++]; /* fallthrough */
182 case 2:
183 obj[i++] = ring[idx++]; /* fallthrough */
184 case 1:
185 obj[i++] = ring[idx++]; /* fallthrough */
186 }
187 } else {
188 for (i = 0; idx < size; i++, idx++)
189 obj[i] = ring[idx];
190 /* Start at the beginning */
191 for (idx = 0; i < n; i++, idx++)
192 obj[i] = ring[idx];
193 }
194}
195
196static __rte_always_inline void
197__rte_ring_dequeue_elems_64(struct rte_ring *r, uint32_t cons_head,
198 void *obj_table, uint32_t n)
199{
200 unsigned int i;
201 const uint32_t size = r->size;
202 uint32_t idx = cons_head & r->mask;
203 uint64_t *ring = (uint64_t *)&r[1];
204 unaligned_uint64_t *obj = (unaligned_uint64_t *)obj_table;
205 if (likely(idx + n <= size)) {
206 for (i = 0; i < (n & ~0x3); i += 4, idx += 4) {
207 obj[i] = ring[idx];
208 obj[i + 1] = ring[idx + 1];
209 obj[i + 2] = ring[idx + 2];
210 obj[i + 3] = ring[idx + 3];
211 }
212 switch (n & 0x3) {
213 case 3:
214 obj[i++] = ring[idx++]; /* fallthrough */
215 case 2:
216 obj[i++] = ring[idx++]; /* fallthrough */
217 case 1:
218 obj[i++] = ring[idx++]; /* fallthrough */
219 }
220 } else {
221 for (i = 0; idx < size; i++, idx++)
222 obj[i] = ring[idx];
223 /* Start at the beginning */
224 for (idx = 0; i < n; i++, idx++)
225 obj[i] = ring[idx];
226 }
227}
228
229static __rte_always_inline void
230__rte_ring_dequeue_elems_128(struct rte_ring *r, uint32_t cons_head,
231 void *obj_table, uint32_t n)
232{
233 unsigned int i;
234 const uint32_t size = r->size;
235 uint32_t idx = cons_head & r->mask;
236 rte_int128_t *ring = (rte_int128_t *)&r[1];
237 rte_int128_t *obj = (rte_int128_t *)obj_table;
238 if (likely(idx + n <= size)) {
239 for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
240 memcpy((void *)(obj + i), (void *)(ring + idx), 32);
241 switch (n & 0x1) {
242 case 1:
243 memcpy((void *)(obj + i), (void *)(ring + idx), 16);
244 }
245 } else {
246 for (i = 0; idx < size; i++, idx++)
247 memcpy((void *)(obj + i), (void *)(ring + idx), 16);
248 /* Start at the beginning */
249 for (idx = 0; i < n; i++, idx++)
250 memcpy((void *)(obj + i), (void *)(ring + idx), 16);
251 }
252}
253
254/* the actual dequeue of elements from the ring.
255 * Placed here since identical code needed in both
256 * single and multi producer enqueue functions.
257 */
258static __rte_always_inline void
259__rte_ring_dequeue_elems(struct rte_ring *r, uint32_t cons_head,
260 void *obj_table, uint32_t esize, uint32_t num)
261{
262 /* 8B and 16B copies implemented individually to retain
263 * the current performance.
264 */
265 if (esize == 8)
266 __rte_ring_dequeue_elems_64(r, cons_head, obj_table, num);
267 else if (esize == 16)
268 __rte_ring_dequeue_elems_128(r, cons_head, obj_table, num);
269 else {
270 uint32_t idx, scale, nr_idx, nr_num, nr_size;
271
272 /* Normalize to uint32_t */
273 scale = esize / sizeof(uint32_t);
274 nr_num = num * scale;
275 idx = cons_head & r->mask;
276 nr_idx = idx * scale;
277 nr_size = r->size * scale;
278 __rte_ring_dequeue_elems_32(r, nr_size, nr_idx,
279 obj_table, nr_num);
280 }
281}
282
283/* Between load and load. there might be cpu reorder in weak model
284 * (powerpc/arm).
285 * There are 2 choices for the users
286 * 1.use rmb() memory barrier
287 * 2.use one-direction load_acquire/store_release barrier
288 * It depends on performance test results.
289 */
290#ifdef RTE_USE_C11_MEM_MODEL
291#include "rte_ring_c11_pvt.h"
292#else
293#include "rte_ring_generic_pvt.h"
294#endif
295
320static __rte_always_inline unsigned int
321__rte_ring_do_enqueue_elem(struct rte_ring *r, const void *obj_table,
322 unsigned int esize, unsigned int n,
323 enum rte_ring_queue_behavior behavior, unsigned int is_sp,
324 unsigned int *free_space)
325{
326 uint32_t prod_head, prod_next;
327 uint32_t free_entries;
328
329 n = __rte_ring_move_prod_head(r, is_sp, n, behavior,
330 &prod_head, &prod_next, &free_entries);
331 if (n == 0)
332 goto end;
333
334 __rte_ring_enqueue_elems(r, prod_head, obj_table, esize, n);
335
336 __rte_ring_update_tail(&r->prod, prod_head, prod_next, is_sp, 1);
337end:
338 if (free_space != NULL)
339 *free_space = free_entries - n;
340 return n;
341}
342
367static __rte_always_inline unsigned int
368__rte_ring_do_dequeue_elem(struct rte_ring *r, void *obj_table,
369 unsigned int esize, unsigned int n,
370 enum rte_ring_queue_behavior behavior, unsigned int is_sc,
371 unsigned int *available)
372{
373 uint32_t cons_head, cons_next;
374 uint32_t entries;
375
376 n = __rte_ring_move_cons_head(r, (int)is_sc, n, behavior,
377 &cons_head, &cons_next, &entries);
378 if (n == 0)
379 goto end;
380
381 __rte_ring_dequeue_elems(r, cons_head, obj_table, esize, n);
382
383 __rte_ring_update_tail(&r->cons, cons_head, cons_next, is_sc, 0);
384
385end:
386 if (available != NULL)
387 *available = entries - n;
388 return n;
389}
390
391#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 120000)
392#pragma GCC diagnostic pop
393#endif
394
395#endif /* _RTE_RING_ELEM_PVT_H_ */
#define likely(x)
#define __rte_always_inline
Definition: rte_common.h:413
rte_ring_queue_behavior
Definition: rte_ring_core.h:40
uint32_t size
uint32_t mask