DPDK  22.07.0
rte_idxd_rawdev_fns.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2021 Intel Corporation
3  */
4 #ifndef _RTE_IDXD_RAWDEV_FNS_H_
5 #define _RTE_IDXD_RAWDEV_FNS_H_
6 
18 #include <stdint.h>
19 #include <rte_errno.h>
20 
21 /*
22  * Defines used in the data path for interacting with IDXD hardware.
23  */
24 #define IDXD_CMD_OP_SHIFT 24
25 enum rte_idxd_ops {
26  idxd_op_nop = 0,
27  idxd_op_batch,
28  idxd_op_drain,
29  idxd_op_memmove,
30  idxd_op_fill
31 };
32 
33 #define IDXD_FLAG_FENCE (1 << 0)
34 #define IDXD_FLAG_COMPLETION_ADDR_VALID (1 << 2)
35 #define IDXD_FLAG_REQUEST_COMPLETION (1 << 3)
36 #define IDXD_FLAG_CACHE_CONTROL (1 << 8)
37 
38 #define IOAT_COMP_UPDATE_SHIFT 3
39 #define IOAT_CMD_OP_SHIFT 24
40 enum rte_ioat_ops {
41  ioat_op_copy = 0, /* Standard DMA Operation */
42  ioat_op_fill /* Block Fill */
43 };
44 
50  uint32_t pasid;
51  uint32_t op_flags;
52  rte_iova_t completion;
53 
55  union {
56  rte_iova_t src; /* source address for copy ops etc. */
57  rte_iova_t desc_addr; /* descriptor pointer for batch */
58  };
59  rte_iova_t dst;
60 
61  uint32_t size; /* length of data for op, or batch size */
62 
63  uint16_t intr_handle; /* completion interrupt handle */
64 
65  /* remaining 26 bytes are reserved */
66  uint16_t __reserved[13];
67 } __rte_aligned(64);
68 
73  uint8_t status;
74  uint8_t result;
75  /* 16-bits pad here */
76  uint32_t completed_size; /* data length, or descriptors for batch */
77 
78  rte_iova_t fault_address;
79  uint32_t invalid_flags;
80 } __rte_aligned(32);
81 
87  uint64_t src;
88  uint64_t dst;
89 };
90 
95 struct rte_idxd_rawdev {
96  enum rte_ioat_dev_type type;
97  struct rte_ioat_xstats xstats;
98 
99  void *portal; /* address to write the batch descriptor */
100 
101  struct rte_ioat_rawdev_config cfg;
102  rte_iova_t desc_iova; /* base address of desc ring, needed for completions */
103 
104  /* counters to track the batches */
105  unsigned short max_batches;
106  unsigned short batch_idx_read;
107  unsigned short batch_idx_write;
108  unsigned short *batch_idx_ring; /* store where each batch ends */
109 
110  /* track descriptors and handles */
111  unsigned short desc_ring_mask;
112  unsigned short hdls_avail; /* handles for ops completed */
113  unsigned short hdls_read; /* the read pointer for hdls/desc rings */
114  unsigned short batch_start; /* start+size == write pointer for hdls/desc */
115  unsigned short batch_size;
116 
117  struct rte_idxd_hw_desc *desc_ring;
118  struct rte_idxd_user_hdl *hdl_ring;
119  /* flags to indicate handle validity. Kept separate from ring, to avoid
120  * using 8 bytes per flag. Upper 8 bits holds error code if any.
121  */
122  uint16_t *hdl_ring_flags;
123 };
124 
125 #define RTE_IDXD_HDL_NORMAL 0
126 #define RTE_IDXD_HDL_INVALID (1 << 0) /* no handle stored for this element */
127 #define RTE_IDXD_HDL_OP_FAILED (1 << 1) /* return failure for this one */
128 #define RTE_IDXD_HDL_OP_SKIPPED (1 << 2) /* this op was skipped */
129 
130 static __rte_always_inline uint16_t
131 __idxd_burst_capacity(int dev_id)
132 {
133  struct rte_idxd_rawdev *idxd =
134  (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
135  uint16_t write_idx = idxd->batch_start + idxd->batch_size;
136  uint16_t used_space, free_space;
137 
138  /* Check for space in the batch ring */
139  if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) ||
140  idxd->batch_idx_write + 1 == idxd->batch_idx_read)
141  return 0;
142 
143  /* for descriptors, check for wrap-around on write but not read */
144  if (idxd->hdls_read > write_idx)
145  write_idx += idxd->desc_ring_mask + 1;
146  used_space = write_idx - idxd->hdls_read;
147 
148  /* Return amount of free space in the descriptor ring
149  * subtract 1 for space for batch descriptor and 1 for possible null desc
150  */
151  free_space = idxd->desc_ring_mask - used_space;
152  if (free_space < 2)
153  return 0;
154  return free_space - 2;
155 }
156 
158 __desc_idx_to_iova(struct rte_idxd_rawdev *idxd, uint16_t n)
159 {
160  return idxd->desc_iova + (n * sizeof(struct rte_idxd_hw_desc));
161 }
162 
163 static __rte_always_inline int
164 __idxd_write_desc(int dev_id,
165  const uint32_t op_flags,
166  const rte_iova_t src,
167  const rte_iova_t dst,
168  const uint32_t size,
169  const struct rte_idxd_user_hdl *hdl)
170 {
171  struct rte_idxd_rawdev *idxd =
172  (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
173  uint16_t write_idx = idxd->batch_start + idxd->batch_size;
174  uint16_t mask = idxd->desc_ring_mask;
175 
176  /* first check batch ring space then desc ring space */
177  if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) ||
178  idxd->batch_idx_write + 1 == idxd->batch_idx_read)
179  goto failed;
180  /* for descriptor ring, we always need a slot for batch completion */
181  if (((write_idx + 2) & mask) == idxd->hdls_read ||
182  ((write_idx + 1) & mask) == idxd->hdls_read)
183  goto failed;
184 
185  /* write desc and handle. Note, descriptors don't wrap */
186  idxd->desc_ring[write_idx].pasid = 0;
187  idxd->desc_ring[write_idx].op_flags = op_flags | IDXD_FLAG_COMPLETION_ADDR_VALID;
188  idxd->desc_ring[write_idx].completion = __desc_idx_to_iova(idxd, write_idx & mask);
189  idxd->desc_ring[write_idx].src = src;
190  idxd->desc_ring[write_idx].dst = dst;
191  idxd->desc_ring[write_idx].size = size;
192 
193  if (hdl == NULL)
194  idxd->hdl_ring_flags[write_idx & mask] = RTE_IDXD_HDL_INVALID;
195  else
196  idxd->hdl_ring[write_idx & mask] = *hdl;
197  idxd->batch_size++;
198 
199  idxd->xstats.enqueued++;
200 
201  rte_prefetch0_write(&idxd->desc_ring[write_idx + 1]);
202  return 1;
203 
204 failed:
205  idxd->xstats.enqueue_failed++;
206  rte_errno = ENOSPC;
207  return 0;
208 }
209 
210 static __rte_always_inline int
211 __idxd_enqueue_fill(int dev_id, uint64_t pattern, rte_iova_t dst,
212  unsigned int length, uintptr_t dst_hdl)
213 {
214  const struct rte_idxd_user_hdl hdl = {
215  .dst = dst_hdl
216  };
217  return __idxd_write_desc(dev_id,
218  (idxd_op_fill << IDXD_CMD_OP_SHIFT) | IDXD_FLAG_CACHE_CONTROL,
219  pattern, dst, length, &hdl);
220 }
221 
222 static __rte_always_inline int
223 __idxd_enqueue_copy(int dev_id, rte_iova_t src, rte_iova_t dst,
224  unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl)
225 {
226  const struct rte_idxd_user_hdl hdl = {
227  .src = src_hdl,
228  .dst = dst_hdl
229  };
230  return __idxd_write_desc(dev_id,
231  (idxd_op_memmove << IDXD_CMD_OP_SHIFT) | IDXD_FLAG_CACHE_CONTROL,
232  src, dst, length, &hdl);
233 }
234 
235 static __rte_always_inline int
236 __idxd_enqueue_nop(int dev_id)
237 {
238  /* only op field needs filling - zero src, dst and length */
239  return __idxd_write_desc(dev_id, idxd_op_nop << IDXD_CMD_OP_SHIFT,
240  0, 0, 0, NULL);
241 }
242 
243 static __rte_always_inline int
244 __idxd_fence(int dev_id)
245 {
246  /* only op field needs filling - zero src, dst and length */
247  return __idxd_write_desc(dev_id, IDXD_FLAG_FENCE, 0, 0, 0, NULL);
248 }
249 
250 static __rte_always_inline void
251 __idxd_movdir64b(volatile void *dst, const struct rte_idxd_hw_desc *src)
252 {
253  asm volatile (".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
254  :
255  : "a" (dst), "d" (src)
256  : "memory");
257 }
258 
259 static __rte_always_inline int
260 __idxd_perform_ops(int dev_id)
261 {
262  struct rte_idxd_rawdev *idxd =
263  (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
264 
265  if (!idxd->cfg.no_prefetch_completions)
266  rte_prefetch1(&idxd->desc_ring[idxd->batch_idx_ring[idxd->batch_idx_read]]);
267 
268  if (idxd->batch_size == 0)
269  return 0;
270 
271  if (idxd->batch_size == 1)
272  /* use a NOP as a null descriptor, so batch_size >= 2 */
273  if (__idxd_enqueue_nop(dev_id) != 1)
274  return -1;
275 
276  /* write completion beyond last desc in the batch */
277  uint16_t comp_idx = (idxd->batch_start + idxd->batch_size) & idxd->desc_ring_mask;
278  *((uint64_t *)&idxd->desc_ring[comp_idx]) = 0; /* zero start of desc */
279  idxd->hdl_ring_flags[comp_idx] = RTE_IDXD_HDL_INVALID;
280 
281  const struct rte_idxd_hw_desc batch_desc = {
282  .op_flags = (idxd_op_batch << IDXD_CMD_OP_SHIFT) |
283  IDXD_FLAG_COMPLETION_ADDR_VALID |
284  IDXD_FLAG_REQUEST_COMPLETION,
285  .desc_addr = __desc_idx_to_iova(idxd, idxd->batch_start),
286  .completion = __desc_idx_to_iova(idxd, comp_idx),
287  .size = idxd->batch_size,
288  };
289 
290  _mm_sfence(); /* fence before writing desc to device */
291  __idxd_movdir64b(idxd->portal, &batch_desc);
292  idxd->xstats.started += idxd->batch_size;
293 
294  idxd->batch_start += idxd->batch_size + 1;
295  idxd->batch_start &= idxd->desc_ring_mask;
296  idxd->batch_size = 0;
297 
298  idxd->batch_idx_ring[idxd->batch_idx_write++] = comp_idx;
299  if (idxd->batch_idx_write > idxd->max_batches)
300  idxd->batch_idx_write = 0;
301 
302  return 0;
303 }
304 
305 static __rte_always_inline int
306 __idxd_completed_ops(int dev_id, uint8_t max_ops, uint32_t *status, uint8_t *num_unsuccessful,
307  uintptr_t *src_hdls, uintptr_t *dst_hdls)
308 {
309  struct rte_idxd_rawdev *idxd =
310  (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
311  unsigned short n, h_idx;
312 
313  while (idxd->batch_idx_read != idxd->batch_idx_write) {
314  uint16_t idx_to_chk = idxd->batch_idx_ring[idxd->batch_idx_read];
315  volatile struct rte_idxd_completion *comp_to_chk =
316  (struct rte_idxd_completion *)&idxd->desc_ring[idx_to_chk];
317  uint8_t batch_status = comp_to_chk->status;
318  if (batch_status == 0)
319  break;
320  comp_to_chk->status = 0;
321  if (unlikely(batch_status > 1)) {
322  /* error occurred somewhere in batch, start where last checked */
323  uint16_t desc_count = comp_to_chk->completed_size;
324  uint16_t batch_start = idxd->hdls_avail;
325  uint16_t batch_end = idx_to_chk;
326 
327  if (batch_start > batch_end)
328  batch_end += idxd->desc_ring_mask + 1;
329  /* go through each batch entry and see status */
330  for (n = 0; n < desc_count; n++) {
331  uint16_t idx = (batch_start + n) & idxd->desc_ring_mask;
332  volatile struct rte_idxd_completion *comp =
333  (struct rte_idxd_completion *)&idxd->desc_ring[idx];
334  if (comp->status != 0 &&
335  idxd->hdl_ring_flags[idx] == RTE_IDXD_HDL_NORMAL) {
336  idxd->hdl_ring_flags[idx] = RTE_IDXD_HDL_OP_FAILED;
337  idxd->hdl_ring_flags[idx] |= (comp->status << 8);
338  comp->status = 0; /* clear error for next time */
339  }
340  }
341  /* if batch is incomplete, mark rest as skipped */
342  for ( ; n < batch_end - batch_start; n++) {
343  uint16_t idx = (batch_start + n) & idxd->desc_ring_mask;
344  if (idxd->hdl_ring_flags[idx] == RTE_IDXD_HDL_NORMAL)
345  idxd->hdl_ring_flags[idx] = RTE_IDXD_HDL_OP_SKIPPED;
346  }
347  }
348  /* avail points to one after the last one written */
349  idxd->hdls_avail = (idx_to_chk + 1) & idxd->desc_ring_mask;
350  idxd->batch_idx_read++;
351  if (idxd->batch_idx_read > idxd->max_batches)
352  idxd->batch_idx_read = 0;
353  }
354 
355  n = 0;
356  h_idx = idxd->hdls_read;
357  while (h_idx != idxd->hdls_avail) {
358  uint16_t flag = idxd->hdl_ring_flags[h_idx];
359  if (flag != RTE_IDXD_HDL_INVALID) {
360  if (!idxd->cfg.hdls_disable) {
361  src_hdls[n] = idxd->hdl_ring[h_idx].src;
362  dst_hdls[n] = idxd->hdl_ring[h_idx].dst;
363  }
364  if (unlikely(flag != RTE_IDXD_HDL_NORMAL)) {
365  if (status != NULL)
366  status[n] = flag == RTE_IDXD_HDL_OP_SKIPPED ?
368  /* failure case, return err code */
369  idxd->hdl_ring_flags[h_idx] >> 8;
370  if (num_unsuccessful != NULL)
371  *num_unsuccessful += 1;
372  }
373  n++;
374  }
375  idxd->hdl_ring_flags[h_idx] = RTE_IDXD_HDL_NORMAL;
376  if (++h_idx > idxd->desc_ring_mask)
377  h_idx = 0;
378  if (n >= max_ops)
379  break;
380  }
381 
382  /* skip over any remaining blank elements, e.g. batch completion */
383  while (idxd->hdl_ring_flags[h_idx] == RTE_IDXD_HDL_INVALID && h_idx != idxd->hdls_avail) {
384  idxd->hdl_ring_flags[h_idx] = RTE_IDXD_HDL_NORMAL;
385  if (++h_idx > idxd->desc_ring_mask)
386  h_idx = 0;
387  }
388  idxd->hdls_read = h_idx;
389 
390  idxd->xstats.completed += n;
391  return n;
392 }
393 
394 #endif
#define __rte_always_inline
Definition: rte_common.h:258
uint64_t rte_iova_t
Definition: rte_common.h:463
#define rte_errno
Definition: rte_errno.h:29
#define RTE_IOAT_OP_SKIPPED
void * dev_private
#define unlikely(x)
static void rte_prefetch1(const volatile void *p)
static __rte_experimental void rte_prefetch0_write(const void *p)
Definition: rte_prefetch.h:68
#define RTE_STD_C11
Definition: rte_common.h:42