DPDK  21.02.0
rte_graph_worker.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2020 Marvell International Ltd.
3  */
4 
5 #ifndef _RTE_GRAPH_WORKER_H_
6 #define _RTE_GRAPH_WORKER_H_
7 
19 #include <rte_common.h>
20 #include <rte_cycles.h>
21 #include <rte_prefetch.h>
22 #include <rte_memcpy.h>
23 #include <rte_memory.h>
24 
25 #include "rte_graph.h"
26 
27 #ifdef __cplusplus
28 extern "C" {
29 #endif
30 
36 struct rte_graph {
37  uint32_t tail;
38  uint32_t head;
39  uint32_t cir_mask;
40  rte_node_t nb_nodes;
41  rte_graph_off_t *cir_start;
42  rte_graph_off_t nodes_start;
43  rte_graph_t id;
44  int socket;
45  char name[RTE_GRAPH_NAMESIZE];
46  uint64_t fence;
48 
54 struct rte_node {
55  /* Slow path area */
56  uint64_t fence;
57  rte_graph_off_t next;
58  rte_node_t id;
59  rte_node_t parent_id;
60  rte_edge_t nb_edges;
61  uint32_t realloc_count;
63  char parent[RTE_NODE_NAMESIZE];
64  char name[RTE_NODE_NAMESIZE];
66  /* Fast path area */
67 #define RTE_NODE_CTX_SZ 16
68  uint8_t ctx[RTE_NODE_CTX_SZ] __rte_cache_aligned;
69  uint16_t size;
70  uint16_t idx;
71  rte_graph_off_t off;
72  uint64_t total_cycles;
73  uint64_t total_calls;
74  uint64_t total_objs;
76  union {
77  void **objs;
78  uint64_t objs_u64;
79  };
81  union {
82  rte_node_process_t process;
83  uint64_t process_u64;
84  };
85  struct rte_node *nodes[] __rte_cache_min_aligned;
87 
100 __rte_experimental
101 void __rte_node_stream_alloc(struct rte_graph *graph, struct rte_node *node);
102 
117 __rte_experimental
118 void __rte_node_stream_alloc_size(struct rte_graph *graph,
119  struct rte_node *node, uint16_t req_size);
120 
130 __rte_experimental
131 static inline void
132 rte_graph_walk(struct rte_graph *graph)
133 {
134  const rte_graph_off_t *cir_start = graph->cir_start;
135  const rte_node_t mask = graph->cir_mask;
136  uint32_t head = graph->head;
137  struct rte_node *node;
138  uint64_t start;
139  uint16_t rc;
140  void **objs;
141 
142  /*
143  * Walk on the source node(s) ((cir_start - head) -> cir_start) and then
144  * on the pending streams (cir_start -> (cir_start + mask) -> cir_start)
145  * in a circular buffer fashion.
146  *
147  * +-----+ <= cir_start - head [number of source nodes]
148  * | |
149  * | ... | <= source nodes
150  * | |
151  * +-----+ <= cir_start [head = 0] [tail = 0]
152  * | |
153  * | ... | <= pending streams
154  * | |
155  * +-----+ <= cir_start + mask
156  */
157  while (likely(head != graph->tail)) {
158  node = RTE_PTR_ADD(graph, cir_start[(int32_t)head++]);
159  RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
160  objs = node->objs;
161  rte_prefetch0(objs);
162 
164  start = rte_rdtsc();
165  rc = node->process(graph, node, objs, node->idx);
166  node->total_cycles += rte_rdtsc() - start;
167  node->total_calls++;
168  node->total_objs += rc;
169  } else {
170  node->process(graph, node, objs, node->idx);
171  }
172  node->idx = 0;
173  head = likely((int32_t)head > 0) ? head & mask : head;
174  }
175  graph->tail = 0;
176 }
177 
178 /* Fast path helper functions */
179 
190 static __rte_always_inline void
191 __rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node *node)
192 {
193  uint32_t tail;
194 
195  tail = graph->tail;
196  graph->cir_start[tail++] = node->off;
197  graph->tail = tail & graph->cir_mask;
198 }
199 
217 static __rte_always_inline void
218 __rte_node_enqueue_prologue(struct rte_graph *graph, struct rte_node *node,
219  const uint16_t idx, const uint16_t space)
220 {
221 
222  /* Add to the pending stream list if the node is new */
223  if (idx == 0)
224  __rte_node_enqueue_tail_update(graph, node);
225 
226  if (unlikely(node->size < (idx + space)))
227  __rte_node_stream_alloc(graph, node);
228 }
229 
243 static __rte_always_inline struct rte_node *
244 __rte_node_next_node_get(struct rte_node *node, rte_edge_t next)
245 {
246  RTE_ASSERT(next < node->nb_edges);
247  RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
248  node = node->nodes[next];
249  RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
250 
251  return node;
252 }
253 
269 __rte_experimental
270 static inline void
271 rte_node_enqueue(struct rte_graph *graph, struct rte_node *node,
272  rte_edge_t next, void **objs, uint16_t nb_objs)
273 {
274  node = __rte_node_next_node_get(node, next);
275  const uint16_t idx = node->idx;
276 
277  __rte_node_enqueue_prologue(graph, node, idx, nb_objs);
278 
279  rte_memcpy(&node->objs[idx], objs, nb_objs * sizeof(void *));
280  node->idx = idx + nb_objs;
281 }
282 
296 __rte_experimental
297 static inline void
298 rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node,
299  rte_edge_t next, void *obj)
300 {
301  node = __rte_node_next_node_get(node, next);
302  uint16_t idx = node->idx;
303 
304  __rte_node_enqueue_prologue(graph, node, idx, 1);
305 
306  node->objs[idx++] = obj;
307  node->idx = idx;
308 }
309 
326 __rte_experimental
327 static inline void
328 rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node,
329  rte_edge_t next, void *obj0, void *obj1)
330 {
331  node = __rte_node_next_node_get(node, next);
332  uint16_t idx = node->idx;
333 
334  __rte_node_enqueue_prologue(graph, node, idx, 2);
335 
336  node->objs[idx++] = obj0;
337  node->objs[idx++] = obj1;
338  node->idx = idx;
339 }
340 
361 __rte_experimental
362 static inline void
363 rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node,
364  rte_edge_t next, void *obj0, void *obj1, void *obj2,
365  void *obj3)
366 {
367  node = __rte_node_next_node_get(node, next);
368  uint16_t idx = node->idx;
369 
370  __rte_node_enqueue_prologue(graph, node, idx, 4);
371 
372  node->objs[idx++] = obj0;
373  node->objs[idx++] = obj1;
374  node->objs[idx++] = obj2;
375  node->objs[idx++] = obj3;
376  node->idx = idx;
377 }
378 
395 __rte_experimental
396 static inline void
397 rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node,
398  rte_edge_t *nexts, void **objs, uint16_t nb_objs)
399 {
400  uint16_t i;
401 
402  for (i = 0; i < nb_objs; i++)
403  rte_node_enqueue_x1(graph, node, nexts[i], objs[i]);
404 }
405 
425 __rte_experimental
426 static inline void **
427 rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node,
428  rte_edge_t next, uint16_t nb_objs)
429 {
430  node = __rte_node_next_node_get(node, next);
431  const uint16_t idx = node->idx;
432  uint16_t free_space = node->size - idx;
433 
434  if (unlikely(free_space < nb_objs))
435  __rte_node_stream_alloc_size(graph, node, nb_objs);
436 
437  return &node->objs[idx];
438 }
439 
456 __rte_experimental
457 static inline void
458 rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node,
459  rte_edge_t next, uint16_t idx)
460 {
461  if (unlikely(!idx))
462  return;
463 
464  node = __rte_node_next_node_get(node, next);
465  if (node->idx == 0)
466  __rte_node_enqueue_tail_update(graph, node);
467 
468  node->idx += idx;
469 }
470 
485 __rte_experimental
486 static inline void
487 rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src,
488  rte_edge_t next)
489 {
490  struct rte_node *dst = __rte_node_next_node_get(src, next);
491 
492  /* Let swap the pointers if dst don't have valid objs */
493  if (likely(dst->idx == 0)) {
494  void **dobjs = dst->objs;
495  uint16_t dsz = dst->size;
496  dst->objs = src->objs;
497  dst->size = src->size;
498  src->objs = dobjs;
499  src->size = dsz;
500  dst->idx = src->idx;
501  __rte_node_enqueue_tail_update(graph, dst);
502  } else { /* Move the objects from src node to dst node */
503  rte_node_enqueue(graph, src, next, src->objs, src->idx);
504  }
505 }
506 
507 #ifdef __cplusplus
508 }
509 #endif
510 
511 #endif /* _RTE_GRAPH_WORKER_H_ */
uint32_t rte_node_t
Definition: rte_graph.h:45
static __rte_experimental void rte_node_enqueue(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, void **objs, uint16_t nb_objs)
#define __rte_always_inline
Definition: rte_common.h:226
static __rte_experimental void rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src, rte_edge_t next)
uint16_t rte_edge_t
Definition: rte_graph.h:46
#define __rte_cache_min_aligned
Definition: rte_common.h:403
#define likely(x)
static __rte_experimental void rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, void *obj0, void *obj1)
#define RTE_NODE_NAMESIZE
Definition: rte_graph.h:37
static __rte_always_inline int rte_graph_has_stats_feature(void)
Definition: rte_graph.h:656
uint16_t rte_graph_t
Definition: rte_graph.h:47
uint16_t(* rte_node_process_t)(struct rte_graph *graph, struct rte_node *node, void **objs, uint16_t nb_objs)
Definition: rte_graph.h:99
#define RTE_GRAPH_NAMESIZE
Definition: rte_graph.h:36
static __rte_experimental void rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, uint16_t idx)
#define RTE_PTR_ADD(ptr, x)
Definition: rte_common.h:248
static __rte_experimental void rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node, rte_edge_t *nexts, void **objs, uint16_t nb_objs)
#define unlikely(x)
static __rte_experimental void rte_graph_walk(struct rte_graph *graph)
#define RTE_GRAPH_FENCE
Definition: rte_graph.h:42
static __rte_experimental void rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, void *obj)
#define __rte_cache_aligned
Definition: rte_common.h:400
#define RTE_STD_C11
Definition: rte_common.h:40
uint32_t rte_graph_off_t
Definition: rte_graph.h:44
static __rte_experimental void ** rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, uint16_t nb_objs)
static void * rte_memcpy(void *dst, const void *src, size_t n)
static __rte_experimental void rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, void *obj0, void *obj1, void *obj2, void *obj3)
static void rte_prefetch0(const volatile void *p)