4 #ifndef _RTE_IDXD_RAWDEV_FNS_H_ 5 #define _RTE_IDXD_RAWDEV_FNS_H_ 23 #define IDXD_CMD_OP_SHIFT 24 32 #define IDXD_FLAG_FENCE (1 << 0) 33 #define IDXD_FLAG_COMPLETION_ADDR_VALID (1 << 2) 34 #define IDXD_FLAG_REQUEST_COMPLETION (1 << 3) 35 #define IDXD_FLAG_CACHE_CONTROL (1 << 8) 37 #define IOAT_COMP_UPDATE_SHIFT 3 38 #define IOAT_CMD_OP_SHIFT 24 65 uint16_t __reserved[13];
75 uint32_t completed_size;
78 uint32_t invalid_flags;
94 struct rte_idxd_rawdev {
95 enum rte_ioat_dev_type type;
96 struct rte_ioat_xstats xstats;
104 unsigned short max_batches;
105 unsigned short batch_idx_read;
106 unsigned short batch_idx_write;
107 unsigned short *batch_idx_ring;
110 unsigned short desc_ring_mask;
111 unsigned short hdls_avail;
112 unsigned short hdls_read;
113 unsigned short batch_start;
114 unsigned short batch_size;
121 uint16_t *hdl_ring_flags;
124 #define RTE_IDXD_HDL_NORMAL 0 125 #define RTE_IDXD_HDL_INVALID (1 << 0) 126 #define RTE_IDXD_HDL_OP_FAILED (1 << 1) 127 #define RTE_IDXD_HDL_OP_SKIPPED (1 << 2) 130 __idxd_burst_capacity(
int dev_id)
132 struct rte_idxd_rawdev *idxd =
133 (
struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
134 uint16_t write_idx = idxd->batch_start + idxd->batch_size;
135 uint16_t used_space, free_space;
138 if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) ||
139 idxd->batch_idx_write + 1 == idxd->batch_idx_read)
143 if (idxd->hdls_read > write_idx)
144 write_idx += idxd->desc_ring_mask + 1;
145 used_space = write_idx - idxd->hdls_read;
150 free_space = idxd->desc_ring_mask - used_space;
153 return free_space - 2;
157 __desc_idx_to_iova(
struct rte_idxd_rawdev *idxd, uint16_t n)
163 __idxd_write_desc(
int dev_id,
164 const uint32_t op_flags,
170 struct rte_idxd_rawdev *idxd =
171 (
struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
172 uint16_t write_idx = idxd->batch_start + idxd->batch_size;
173 uint16_t mask = idxd->desc_ring_mask;
176 if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) ||
177 idxd->batch_idx_write + 1 == idxd->batch_idx_read)
180 if (((write_idx + 2) & mask) == idxd->hdls_read ||
181 ((write_idx + 1) & mask) == idxd->hdls_read)
185 idxd->desc_ring[write_idx].pasid = 0;
186 idxd->desc_ring[write_idx].op_flags = op_flags | IDXD_FLAG_COMPLETION_ADDR_VALID;
187 idxd->desc_ring[write_idx].completion = __desc_idx_to_iova(idxd, write_idx & mask);
188 idxd->desc_ring[write_idx].src = src;
189 idxd->desc_ring[write_idx].dst = dst;
190 idxd->desc_ring[write_idx].size = size;
193 idxd->hdl_ring_flags[write_idx & mask] = RTE_IDXD_HDL_INVALID;
195 idxd->hdl_ring[write_idx & mask] = *hdl;
198 idxd->xstats.enqueued++;
204 idxd->xstats.enqueue_failed++;
210 __idxd_enqueue_fill(
int dev_id, uint64_t pattern,
rte_iova_t dst,
211 unsigned int length, uintptr_t dst_hdl)
216 return __idxd_write_desc(dev_id,
217 (idxd_op_fill << IDXD_CMD_OP_SHIFT) | IDXD_FLAG_CACHE_CONTROL,
218 pattern, dst, length, &hdl);
223 unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl)
229 return __idxd_write_desc(dev_id,
230 (idxd_op_memmove << IDXD_CMD_OP_SHIFT) | IDXD_FLAG_CACHE_CONTROL,
231 src, dst, length, &hdl);
235 __idxd_enqueue_nop(
int dev_id)
238 return __idxd_write_desc(dev_id, idxd_op_nop << IDXD_CMD_OP_SHIFT,
243 __idxd_fence(
int dev_id)
246 return __idxd_write_desc(dev_id, IDXD_FLAG_FENCE, 0, 0, 0, NULL);
252 asm volatile (
".byte 0x66, 0x0f, 0x38, 0xf8, 0x02" 254 :
"a" (dst),
"d" (src)
259 __idxd_perform_ops(
int dev_id)
261 struct rte_idxd_rawdev *idxd =
262 (
struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
264 if (!idxd->cfg.no_prefetch_completions)
265 rte_prefetch1(&idxd->desc_ring[idxd->batch_idx_ring[idxd->batch_idx_read]]);
267 if (idxd->batch_size == 0)
270 if (idxd->batch_size == 1)
272 if (__idxd_enqueue_nop(dev_id) != 1)
276 uint16_t comp_idx = (idxd->batch_start + idxd->batch_size) & idxd->desc_ring_mask;
277 *((uint64_t *)&idxd->desc_ring[comp_idx]) = 0;
278 idxd->hdl_ring_flags[comp_idx] = RTE_IDXD_HDL_INVALID;
281 .op_flags = (idxd_op_batch << IDXD_CMD_OP_SHIFT) |
282 IDXD_FLAG_COMPLETION_ADDR_VALID |
283 IDXD_FLAG_REQUEST_COMPLETION,
284 .desc_addr = __desc_idx_to_iova(idxd, idxd->batch_start),
285 .completion = __desc_idx_to_iova(idxd, comp_idx),
286 .size = idxd->batch_size,
290 __idxd_movdir64b(idxd->portal, &batch_desc);
291 idxd->xstats.started += idxd->batch_size;
293 idxd->batch_start += idxd->batch_size + 1;
294 idxd->batch_start &= idxd->desc_ring_mask;
295 idxd->batch_size = 0;
297 idxd->batch_idx_ring[idxd->batch_idx_write++] = comp_idx;
298 if (idxd->batch_idx_write > idxd->max_batches)
299 idxd->batch_idx_write = 0;
305 __idxd_completed_ops(
int dev_id, uint8_t max_ops, uint32_t *status, uint8_t *num_unsuccessful,
306 uintptr_t *src_hdls, uintptr_t *dst_hdls)
308 struct rte_idxd_rawdev *idxd =
309 (
struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
310 unsigned short n, h_idx;
312 while (idxd->batch_idx_read != idxd->batch_idx_write) {
313 uint16_t idx_to_chk = idxd->batch_idx_ring[idxd->batch_idx_read];
316 uint8_t batch_status = comp_to_chk->status;
317 if (batch_status == 0)
319 comp_to_chk->status = 0;
322 uint16_t desc_count = comp_to_chk->completed_size;
323 uint16_t batch_start = idxd->hdls_avail;
324 uint16_t batch_end = idx_to_chk;
326 if (batch_start > batch_end)
327 batch_end += idxd->desc_ring_mask + 1;
329 for (n = 0; n < desc_count; n++) {
330 uint16_t idx = (batch_start + n) & idxd->desc_ring_mask;
333 if (comp->status != 0 &&
334 idxd->hdl_ring_flags[idx] == RTE_IDXD_HDL_NORMAL) {
335 idxd->hdl_ring_flags[idx] = RTE_IDXD_HDL_OP_FAILED;
336 idxd->hdl_ring_flags[idx] |= (comp->status << 8);
341 for ( ; n < batch_end - batch_start; n++) {
342 uint16_t idx = (batch_start + n) & idxd->desc_ring_mask;
343 if (idxd->hdl_ring_flags[idx] == RTE_IDXD_HDL_NORMAL)
344 idxd->hdl_ring_flags[idx] = RTE_IDXD_HDL_OP_SKIPPED;
348 idxd->hdls_avail = (idx_to_chk + 1) & idxd->desc_ring_mask;
349 idxd->batch_idx_read++;
350 if (idxd->batch_idx_read > idxd->max_batches)
351 idxd->batch_idx_read = 0;
355 h_idx = idxd->hdls_read;
356 while (h_idx != idxd->hdls_avail) {
357 uint16_t flag = idxd->hdl_ring_flags[h_idx];
358 if (flag != RTE_IDXD_HDL_INVALID) {
359 if (!idxd->cfg.hdls_disable) {
360 src_hdls[n] = idxd->hdl_ring[h_idx].src;
361 dst_hdls[n] = idxd->hdl_ring[h_idx].dst;
363 if (
unlikely(flag != RTE_IDXD_HDL_NORMAL)) {
365 status[n] = flag == RTE_IDXD_HDL_OP_SKIPPED ?
368 idxd->hdl_ring_flags[h_idx] >> 8;
369 if (num_unsuccessful != NULL)
370 *num_unsuccessful += 1;
374 idxd->hdl_ring_flags[h_idx] = RTE_IDXD_HDL_NORMAL;
375 if (++h_idx > idxd->desc_ring_mask)
382 while (idxd->hdl_ring_flags[h_idx] == RTE_IDXD_HDL_INVALID && h_idx != idxd->hdls_avail) {
383 idxd->hdl_ring_flags[h_idx] = RTE_IDXD_HDL_NORMAL;
384 if (++h_idx > idxd->desc_ring_mask)
387 idxd->hdls_read = h_idx;
389 idxd->xstats.completed += n;
#define __rte_always_inline
#define RTE_IOAT_OP_SKIPPED
static void rte_prefetch1(const volatile void *p)
static __rte_experimental void rte_prefetch0_write(const void *p)
__extension__ struct rte_eth_link __rte_aligned(8)