DPDK  22.11.5
rte_graph_worker.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2020 Marvell International Ltd.
3  */
4 
5 #ifndef _RTE_GRAPH_WORKER_H_
6 #define _RTE_GRAPH_WORKER_H_
7 
19 #include <rte_compat.h>
20 #include <rte_common.h>
21 #include <rte_cycles.h>
22 #include <rte_prefetch.h>
23 #include <rte_memcpy.h>
24 #include <rte_memory.h>
25 
26 #include "rte_graph.h"
27 
28 #ifdef __cplusplus
29 extern "C" {
30 #endif
31 
37 struct rte_graph {
38  uint32_t tail;
39  uint32_t head;
40  uint32_t cir_mask;
41  rte_node_t nb_nodes;
42  rte_graph_off_t *cir_start;
43  rte_graph_off_t nodes_start;
44  rte_graph_t id;
45  int socket;
46  char name[RTE_GRAPH_NAMESIZE];
47  uint64_t fence;
49 
55 struct rte_node {
56  /* Slow path area */
57  uint64_t fence;
58  rte_graph_off_t next;
59  rte_node_t id;
60  rte_node_t parent_id;
61  rte_edge_t nb_edges;
62  uint32_t realloc_count;
64  char parent[RTE_NODE_NAMESIZE];
65  char name[RTE_NODE_NAMESIZE];
67  /* Fast path area */
68 #define RTE_NODE_CTX_SZ 16
69  uint8_t ctx[RTE_NODE_CTX_SZ] __rte_cache_aligned;
70  uint16_t size;
71  uint16_t idx;
72  rte_graph_off_t off;
73  uint64_t total_cycles;
74  uint64_t total_calls;
75  uint64_t total_objs;
77  union {
78  void **objs;
79  uint64_t objs_u64;
80  };
82  union {
83  rte_node_process_t process;
84  uint64_t process_u64;
85  };
86  struct rte_node *nodes[] __rte_cache_min_aligned;
88 
101 __rte_experimental
102 void __rte_node_stream_alloc(struct rte_graph *graph, struct rte_node *node);
103 
118 __rte_experimental
119 void __rte_node_stream_alloc_size(struct rte_graph *graph,
120  struct rte_node *node, uint16_t req_size);
121 
131 __rte_experimental
132 static inline void
133 rte_graph_walk(struct rte_graph *graph)
134 {
135  const rte_graph_off_t *cir_start = graph->cir_start;
136  const rte_node_t mask = graph->cir_mask;
137  uint32_t head = graph->head;
138  struct rte_node *node;
139  uint64_t start;
140  uint16_t rc;
141  void **objs;
142 
143  /*
144  * Walk on the source node(s) ((cir_start - head) -> cir_start) and then
145  * on the pending streams (cir_start -> (cir_start + mask) -> cir_start)
146  * in a circular buffer fashion.
147  *
148  * +-----+ <= cir_start - head [number of source nodes]
149  * | |
150  * | ... | <= source nodes
151  * | |
152  * +-----+ <= cir_start [head = 0] [tail = 0]
153  * | |
154  * | ... | <= pending streams
155  * | |
156  * +-----+ <= cir_start + mask
157  */
158  while (likely(head != graph->tail)) {
159  node = (struct rte_node *)RTE_PTR_ADD(graph, cir_start[(int32_t)head++]);
160  RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
161  objs = node->objs;
162  rte_prefetch0(objs);
163 
165  start = rte_rdtsc();
166  rc = node->process(graph, node, objs, node->idx);
167  node->total_cycles += rte_rdtsc() - start;
168  node->total_calls++;
169  node->total_objs += rc;
170  } else {
171  node->process(graph, node, objs, node->idx);
172  }
173  node->idx = 0;
174  head = likely((int32_t)head > 0) ? head & mask : head;
175  }
176  graph->tail = 0;
177 }
178 
179 /* Fast path helper functions */
180 
191 static __rte_always_inline void
192 __rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node *node)
193 {
194  uint32_t tail;
195 
196  tail = graph->tail;
197  graph->cir_start[tail++] = node->off;
198  graph->tail = tail & graph->cir_mask;
199 }
200 
218 static __rte_always_inline void
219 __rte_node_enqueue_prologue(struct rte_graph *graph, struct rte_node *node,
220  const uint16_t idx, const uint16_t space)
221 {
222 
223  /* Add to the pending stream list if the node is new */
224  if (idx == 0)
225  __rte_node_enqueue_tail_update(graph, node);
226 
227  if (unlikely(node->size < (idx + space)))
228  __rte_node_stream_alloc_size(graph, node, node->size + space);
229 }
230 
244 static __rte_always_inline struct rte_node *
245 __rte_node_next_node_get(struct rte_node *node, rte_edge_t next)
246 {
247  RTE_ASSERT(next < node->nb_edges);
248  RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
249  node = node->nodes[next];
250  RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
251 
252  return node;
253 }
254 
270 __rte_experimental
271 static inline void
272 rte_node_enqueue(struct rte_graph *graph, struct rte_node *node,
273  rte_edge_t next, void **objs, uint16_t nb_objs)
274 {
275  node = __rte_node_next_node_get(node, next);
276  const uint16_t idx = node->idx;
277 
278  __rte_node_enqueue_prologue(graph, node, idx, nb_objs);
279 
280  rte_memcpy(&node->objs[idx], objs, nb_objs * sizeof(void *));
281  node->idx = idx + nb_objs;
282 }
283 
297 __rte_experimental
298 static inline void
299 rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node,
300  rte_edge_t next, void *obj)
301 {
302  node = __rte_node_next_node_get(node, next);
303  uint16_t idx = node->idx;
304 
305  __rte_node_enqueue_prologue(graph, node, idx, 1);
306 
307  node->objs[idx++] = obj;
308  node->idx = idx;
309 }
310 
327 __rte_experimental
328 static inline void
329 rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node,
330  rte_edge_t next, void *obj0, void *obj1)
331 {
332  node = __rte_node_next_node_get(node, next);
333  uint16_t idx = node->idx;
334 
335  __rte_node_enqueue_prologue(graph, node, idx, 2);
336 
337  node->objs[idx++] = obj0;
338  node->objs[idx++] = obj1;
339  node->idx = idx;
340 }
341 
362 __rte_experimental
363 static inline void
364 rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node,
365  rte_edge_t next, void *obj0, void *obj1, void *obj2,
366  void *obj3)
367 {
368  node = __rte_node_next_node_get(node, next);
369  uint16_t idx = node->idx;
370 
371  __rte_node_enqueue_prologue(graph, node, idx, 4);
372 
373  node->objs[idx++] = obj0;
374  node->objs[idx++] = obj1;
375  node->objs[idx++] = obj2;
376  node->objs[idx++] = obj3;
377  node->idx = idx;
378 }
379 
396 __rte_experimental
397 static inline void
398 rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node,
399  rte_edge_t *nexts, void **objs, uint16_t nb_objs)
400 {
401  uint16_t i;
402 
403  for (i = 0; i < nb_objs; i++)
404  rte_node_enqueue_x1(graph, node, nexts[i], objs[i]);
405 }
406 
426 __rte_experimental
427 static inline void **
428 rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node,
429  rte_edge_t next, uint16_t nb_objs)
430 {
431  node = __rte_node_next_node_get(node, next);
432  const uint16_t idx = node->idx;
433  uint16_t free_space = node->size - idx;
434 
435  if (unlikely(free_space < nb_objs))
436  __rte_node_stream_alloc_size(graph, node, node->size + nb_objs);
437 
438  return &node->objs[idx];
439 }
440 
457 __rte_experimental
458 static inline void
459 rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node,
460  rte_edge_t next, uint16_t idx)
461 {
462  if (unlikely(!idx))
463  return;
464 
465  node = __rte_node_next_node_get(node, next);
466  if (node->idx == 0)
467  __rte_node_enqueue_tail_update(graph, node);
468 
469  node->idx += idx;
470 }
471 
486 __rte_experimental
487 static inline void
488 rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src,
489  rte_edge_t next)
490 {
491  struct rte_node *dst = __rte_node_next_node_get(src, next);
492 
493  /* Let swap the pointers if dst don't have valid objs */
494  if (likely(dst->idx == 0)) {
495  void **dobjs = dst->objs;
496  uint16_t dsz = dst->size;
497  dst->objs = src->objs;
498  dst->size = src->size;
499  src->objs = dobjs;
500  src->size = dsz;
501  dst->idx = src->idx;
502  __rte_node_enqueue_tail_update(graph, dst);
503  } else { /* Move the objects from src node to dst node */
504  rte_node_enqueue(graph, src, next, src->objs, src->idx);
505  }
506 }
507 
508 #ifdef __cplusplus
509 }
510 #endif
511 
512 #endif /* _RTE_GRAPH_WORKER_H_ */
uint32_t rte_node_t
Definition: rte_graph.h:45
static __rte_experimental void rte_node_enqueue(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, void **objs, uint16_t nb_objs)
#define __rte_always_inline
Definition: rte_common.h:255
static __rte_experimental void rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src, rte_edge_t next)
uint16_t rte_edge_t
Definition: rte_graph.h:46
#define __rte_cache_min_aligned
Definition: rte_common.h:443
#define likely(x)
static __rte_experimental void rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, void *obj0, void *obj1)
#define RTE_NODE_NAMESIZE
Definition: rte_graph.h:37
static __rte_always_inline int rte_graph_has_stats_feature(void)
Definition: rte_graph.h:656
uint16_t rte_graph_t
Definition: rte_graph.h:47
uint16_t(* rte_node_process_t)(struct rte_graph *graph, struct rte_node *node, void **objs, uint16_t nb_objs)
Definition: rte_graph.h:99
#define RTE_GRAPH_NAMESIZE
Definition: rte_graph.h:36
static __rte_experimental void rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, uint16_t idx)
#define RTE_PTR_ADD(ptr, x)
Definition: rte_common.h:290
static __rte_experimental void rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node, rte_edge_t *nexts, void **objs, uint16_t nb_objs)
#define unlikely(x)
static __rte_experimental void rte_graph_walk(struct rte_graph *graph)
#define RTE_GRAPH_FENCE
Definition: rte_graph.h:42
static __rte_experimental void rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, void *obj)
#define __rte_cache_aligned
Definition: rte_common.h:440
#define RTE_STD_C11
Definition: rte_common.h:39
uint32_t rte_graph_off_t
Definition: rte_graph.h:44
static __rte_experimental void ** rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, uint16_t nb_objs)
static void * rte_memcpy(void *dst, const void *src, size_t n)
static __rte_experimental void rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, void *obj0, void *obj1, void *obj2, void *obj3)
static void rte_prefetch0(const volatile void *p)