DPDK  20.05.0
rte_graph_worker.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2020 Marvell International Ltd.
3  */
4 
5 #ifndef _RTE_GRAPH_WORKER_H_
6 #define _RTE_GRAPH_WORKER_H_
7 
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_prefetch.h>
21 #include <rte_memcpy.h>
22 #include <rte_memory.h>
23 
24 #include "rte_graph.h"
25 
26 #ifdef __cplusplus
27 extern "C" {
28 #endif
29 
35 struct rte_graph {
36  uint32_t tail;
37  uint32_t head;
38  uint32_t cir_mask;
39  rte_node_t nb_nodes;
40  rte_graph_off_t *cir_start;
41  rte_graph_off_t nodes_start;
42  rte_graph_t id;
43  int socket;
44  char name[RTE_GRAPH_NAMESIZE];
45  uint64_t fence;
47 
53 struct rte_node {
54  /* Slow path area */
55  uint64_t fence;
56  rte_graph_off_t next;
57  rte_node_t id;
58  rte_node_t parent_id;
59  rte_edge_t nb_edges;
60  uint32_t realloc_count;
62  char parent[RTE_NODE_NAMESIZE];
63  char name[RTE_NODE_NAMESIZE];
65  /* Fast path area */
66 #define RTE_NODE_CTX_SZ 16
67  uint8_t ctx[RTE_NODE_CTX_SZ] __rte_cache_aligned;
68  uint16_t size;
69  uint16_t idx;
70  rte_graph_off_t off;
71  uint64_t total_cycles;
72  uint64_t total_calls;
73  uint64_t total_objs;
75  union {
76  void **objs;
77  uint64_t objs_u64;
78  };
80  union {
81  rte_node_process_t process;
82  uint64_t process_u64;
83  };
84  struct rte_node *nodes[] __rte_cache_min_aligned;
86 
99 __rte_experimental
100 void __rte_node_stream_alloc(struct rte_graph *graph, struct rte_node *node);
101 
116 __rte_experimental
117 void __rte_node_stream_alloc_size(struct rte_graph *graph,
118  struct rte_node *node, uint16_t req_size);
119 
129 __rte_experimental
130 static inline void
131 rte_graph_walk(struct rte_graph *graph)
132 {
133  const rte_graph_off_t *cir_start = graph->cir_start;
134  const rte_node_t mask = graph->cir_mask;
135  uint32_t head = graph->head;
136  struct rte_node *node;
137  uint64_t start;
138  uint16_t rc;
139  void **objs;
140 
141  /*
142  * Walk on the source node(s) ((cir_start - head) -> cir_start) and then
143  * on the pending streams (cir_start -> (cir_start + mask) -> cir_start)
144  * in a circular buffer fashion.
145  *
146  * +-----+ <= cir_start - head [number of source nodes]
147  * | |
148  * | ... | <= source nodes
149  * | |
150  * +-----+ <= cir_start [head = 0] [tail = 0]
151  * | |
152  * | ... | <= pending streams
153  * | |
154  * +-----+ <= cir_start + mask
155  */
156  while (likely(head != graph->tail)) {
157  node = RTE_PTR_ADD(graph, cir_start[(int32_t)head++]);
158  RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
159  objs = node->objs;
160  rte_prefetch0(objs);
161 
163  start = rte_rdtsc();
164  rc = node->process(graph, node, objs, node->idx);
165  node->total_cycles += rte_rdtsc() - start;
166  node->total_calls++;
167  node->total_objs += rc;
168  } else {
169  node->process(graph, node, objs, node->idx);
170  }
171  node->idx = 0;
172  head = likely((int32_t)head > 0) ? head & mask : head;
173  }
174  graph->tail = 0;
175 }
176 
177 /* Fast path helper functions */
178 
189 static __rte_always_inline void
190 __rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node *node)
191 {
192  uint32_t tail;
193 
194  tail = graph->tail;
195  graph->cir_start[tail++] = node->off;
196  graph->tail = tail & graph->cir_mask;
197 }
198 
216 static __rte_always_inline void
217 __rte_node_enqueue_prologue(struct rte_graph *graph, struct rte_node *node,
218  const uint16_t idx, const uint16_t space)
219 {
220 
221  /* Add to the pending stream list if the node is new */
222  if (idx == 0)
223  __rte_node_enqueue_tail_update(graph, node);
224 
225  if (unlikely(node->size < (idx + space)))
226  __rte_node_stream_alloc(graph, node);
227 }
228 
242 static __rte_always_inline struct rte_node *
243 __rte_node_next_node_get(struct rte_node *node, rte_edge_t next)
244 {
245  RTE_ASSERT(next < node->nb_edges);
246  RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
247  node = node->nodes[next];
248  RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
249 
250  return node;
251 }
252 
268 __rte_experimental
269 static inline void
270 rte_node_enqueue(struct rte_graph *graph, struct rte_node *node,
271  rte_edge_t next, void **objs, uint16_t nb_objs)
272 {
273  node = __rte_node_next_node_get(node, next);
274  const uint16_t idx = node->idx;
275 
276  __rte_node_enqueue_prologue(graph, node, idx, nb_objs);
277 
278  rte_memcpy(&node->objs[idx], objs, nb_objs * sizeof(void *));
279  node->idx = idx + nb_objs;
280 }
281 
295 __rte_experimental
296 static inline void
297 rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node,
298  rte_edge_t next, void *obj)
299 {
300  node = __rte_node_next_node_get(node, next);
301  uint16_t idx = node->idx;
302 
303  __rte_node_enqueue_prologue(graph, node, idx, 1);
304 
305  node->objs[idx++] = obj;
306  node->idx = idx;
307 }
308 
325 __rte_experimental
326 static inline void
327 rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node,
328  rte_edge_t next, void *obj0, void *obj1)
329 {
330  node = __rte_node_next_node_get(node, next);
331  uint16_t idx = node->idx;
332 
333  __rte_node_enqueue_prologue(graph, node, idx, 2);
334 
335  node->objs[idx++] = obj0;
336  node->objs[idx++] = obj1;
337  node->idx = idx;
338 }
339 
360 __rte_experimental
361 static inline void
362 rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node,
363  rte_edge_t next, void *obj0, void *obj1, void *obj2,
364  void *obj3)
365 {
366  node = __rte_node_next_node_get(node, next);
367  uint16_t idx = node->idx;
368 
369  __rte_node_enqueue_prologue(graph, node, idx, 4);
370 
371  node->objs[idx++] = obj0;
372  node->objs[idx++] = obj1;
373  node->objs[idx++] = obj2;
374  node->objs[idx++] = obj3;
375  node->idx = idx;
376 }
377 
394 __rte_experimental
395 static inline void
396 rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node,
397  rte_edge_t *nexts, void **objs, uint16_t nb_objs)
398 {
399  uint16_t i;
400 
401  for (i = 0; i < nb_objs; i++)
402  rte_node_enqueue_x1(graph, node, nexts[i], objs[i]);
403 }
404 
424 __rte_experimental
425 static inline void **
426 rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node,
427  rte_edge_t next, uint16_t nb_objs)
428 {
429  node = __rte_node_next_node_get(node, next);
430  const uint16_t idx = node->idx;
431  uint16_t free_space = node->size - idx;
432 
433  if (unlikely(free_space < nb_objs))
434  __rte_node_stream_alloc_size(graph, node, nb_objs);
435 
436  return &node->objs[idx];
437 }
438 
455 __rte_experimental
456 static inline void
457 rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node,
458  rte_edge_t next, uint16_t idx)
459 {
460  if (unlikely(!idx))
461  return;
462 
463  node = __rte_node_next_node_get(node, next);
464  if (node->idx == 0)
465  __rte_node_enqueue_tail_update(graph, node);
466 
467  node->idx += idx;
468 }
469 
484 __rte_experimental
485 static inline void
486 rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src,
487  rte_edge_t next)
488 {
489  struct rte_node *dst = __rte_node_next_node_get(src, next);
490 
491  /* Let swap the pointers if dst don't have valid objs */
492  if (likely(dst->idx == 0)) {
493  void **dobjs = dst->objs;
494  uint16_t dsz = dst->size;
495  dst->objs = src->objs;
496  dst->size = src->size;
497  src->objs = dobjs;
498  src->size = dsz;
499  dst->idx = src->idx;
500  __rte_node_enqueue_tail_update(graph, dst);
501  } else { /* Move the objects from src node to dst node */
502  rte_node_enqueue(graph, src, next, src->objs, src->idx);
503  }
504 }
505 
506 #ifdef __cplusplus
507 }
508 #endif
509 
510 #endif /* _RTE_GRAPH_WORKER_H_ */
uint32_t rte_node_t
Definition: rte_graph.h:44
static __rte_experimental void rte_node_enqueue(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, void **objs, uint16_t nb_objs)
#define __rte_always_inline
Definition: rte_common.h:193
static __rte_experimental void rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src, rte_edge_t next)
uint16_t rte_edge_t
Definition: rte_graph.h:45
#define __rte_cache_min_aligned
Definition: rte_common.h:370
#define likely(x)
static __rte_experimental void rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, void *obj0, void *obj1)
#define RTE_NODE_NAMESIZE
Definition: rte_graph.h:36
static __rte_always_inline int rte_graph_has_stats_feature(void)
Definition: rte_graph.h:655
uint16_t rte_graph_t
Definition: rte_graph.h:46
uint16_t(* rte_node_process_t)(struct rte_graph *graph, struct rte_node *node, void **objs, uint16_t nb_objs)
Definition: rte_graph.h:98
#define RTE_GRAPH_NAMESIZE
Definition: rte_graph.h:35
static __rte_experimental void rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, uint16_t idx)
#define RTE_PTR_ADD(ptr, x)
Definition: rte_common.h:215
static __rte_experimental void rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node, rte_edge_t *nexts, void **objs, uint16_t nb_objs)
#define unlikely(x)
static __rte_experimental void rte_graph_walk(struct rte_graph *graph)
#define RTE_GRAPH_FENCE
Definition: rte_graph.h:41
static __rte_experimental void rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, void *obj)
#define RTE_STD_C11
Definition: rte_common.h:40
#define __rte_cache_aligned
Definition: rte_common.h:367
uint32_t rte_graph_off_t
Definition: rte_graph.h:43
static __rte_experimental void ** rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, uint16_t nb_objs)
static void * rte_memcpy(void *dst, const void *src, size_t n)
static __rte_experimental void rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, void *obj0, void *obj1, void *obj2, void *obj3)
static void rte_prefetch0(const volatile void *p)