api/rte__rcu__qsbr_8h_source.html

/* SPDX-License-Identifier: BSD-3-Clause

 * Copyright (c) 2018-2020 Arm Limited

 */


#ifndef _RTE_RCU_QSBR_H_

#define _RTE_RCU_QSBR_H_


#include <inttypes.h>

#include <stdalign.h>

#include <stdbool.h>

#include <stdio.h>

#include <stdint.h>


#include <rte_common.h>

#include <rte_debug.h>

#include <rte_atomic.h>

#include <rte_ring.h>


#ifdef __cplusplus

extern "C" {

#endif


extern int rte_rcu_log_type;

#define RTE_LOGTYPE_RCU rte_rcu_log_type


#if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG

#define __RTE_RCU_DP_LOG(level, ...) \

    RTE_LOG_DP_LINE_PREFIX(level, RCU, "%s(): ", __func__, __VA_ARGS__)

#else

#define __RTE_RCU_DP_LOG(level, ...)

#endif


#if defined(RTE_LIBRTE_RCU_DEBUG)

#define __RTE_RCU_IS_LOCK_CNT_ZERO(v, thread_id, level, ...) do { \

    if (v->qsbr_cnt[thread_id].lock_cnt) \

        RTE_LOG_LINE_PREFIX(level, RCU, "%s(): ", __func__, __VA_ARGS__); \

} while (0)

#else

#define __RTE_RCU_IS_LOCK_CNT_ZERO(v, thread_id, level, ...)

#endif


/* Registered thread IDs are stored as a bitmap of 64b element array.

 * Given thread id needs to be converted to index into the array and

 * the id within the array element.

 */

#define __RTE_QSBR_THRID_ARRAY_ELM_SIZE (sizeof(RTE_ATOMIC(uint64_t)) * 8)

#define __RTE_QSBR_THRID_ARRAY_SIZE(max_threads) \

    RTE_ALIGN(RTE_ALIGN_MUL_CEIL(max_threads, \

        __RTE_QSBR_THRID_ARRAY_ELM_SIZE) >> 3, RTE_CACHE_LINE_SIZE)

#define __RTE_QSBR_THRID_ARRAY_ELM(v, i) ((uint64_t __rte_atomic *) \

    ((struct rte_rcu_qsbr_cnt *)(v + 1) + v->max_threads) + i)

#define __RTE_QSBR_THRID_INDEX_SHIFT 6

#define __RTE_QSBR_THRID_MASK 0x3f

#define RTE_QSBR_THRID_INVALID 0xffffffff


/* Worker thread counter */

struct __rte_cache_aligned rte_rcu_qsbr_cnt {

    RTE_ATOMIC(uint64_t) cnt;

    RTE_ATOMIC(uint32_t) lock_cnt;

};


#define __RTE_QSBR_CNT_THR_OFFLINE 0

#define __RTE_QSBR_CNT_INIT 1

#define __RTE_QSBR_CNT_MAX ((uint64_t)~0)

#define __RTE_QSBR_TOKEN_SIZE sizeof(uint64_t)


/* RTE Quiescent State variable structure.

 * This structure has two elements that vary in size based on the

 * 'max_threads' parameter.

 * 1) Quiescent state counter array

 * 2) Register thread ID array

 */

struct __rte_cache_aligned rte_rcu_qsbr {

    alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(uint64_t) token;

    RTE_ATOMIC(uint64_t) acked_token;

    alignas(RTE_CACHE_LINE_SIZE) uint32_t num_elems;

    RTE_ATOMIC(uint32_t) num_threads;

    uint32_t max_threads;

    alignas(RTE_CACHE_LINE_SIZE) struct rte_rcu_qsbr_cnt qsbr_cnt[];

};


typedef void (*rte_rcu_qsbr_free_resource_t)(void *p, void *e, unsigned int n);


#define RTE_RCU_QSBR_DQ_NAMESIZE RTE_RING_NAMESIZE


#define RTE_RCU_QSBR_DQ_MT_UNSAFE 1


struct rte_rcu_qsbr_dq_parameters {

    const char *name;

    uint32_t flags;

    uint32_t size;

    uint32_t esize;

    uint32_t trigger_reclaim_limit;

    uint32_t max_reclaim_size;

    rte_rcu_qsbr_free_resource_t free_fn;

    void *p;

    struct rte_rcu_qsbr *v;

};


/* RTE defer queue structure.

 * This structure holds the defer queue. The defer queue is used to

 * hold the deleted entries from the data structure that are not

 * yet freed.

 */

struct rte_rcu_qsbr_dq;


size_t

rte_rcu_qsbr_get_memsize(uint32_t max_threads);


int

rte_rcu_qsbr_init(struct rte_rcu_qsbr *v, uint32_t max_threads);


int

rte_rcu_qsbr_thread_register(struct rte_rcu_qsbr *v, unsigned int thread_id);


int

rte_rcu_qsbr_thread_unregister(struct rte_rcu_qsbr *v, unsigned int thread_id);


static __rte_always_inline void

rte_rcu_qsbr_thread_online(struct rte_rcu_qsbr *v, unsigned int thread_id)

{

    uint64_t t;


    RTE_ASSERT(v != NULL && thread_id < v->max_threads);


    __RTE_RCU_IS_LOCK_CNT_ZERO(v, thread_id, ERR, "Lock counter %u",

                v->qsbr_cnt[thread_id].lock_cnt);


    /* Copy the current value of token.

     * The fence at the end of the function will ensure that

     * the following will not move down after the load of any shared

     * data structure.

     */

    t = rte_atomic_load_explicit(&v->token, rte_memory_order_relaxed);


    /* rte_atomic_store_explicit(cnt, rte_memory_order_relaxed) is used to ensure

     * 'cnt' (64b) is accessed atomically.

     */

    rte_atomic_store_explicit(&v->qsbr_cnt[thread_id].cnt,

        t, rte_memory_order_relaxed);


    /* The subsequent load of the data structure should not

     * move above the store. Hence a store-load barrier

     * is required.

     * If the load of the data structure moves above the store,

     * writer might not see that the reader is online, even though

     * the reader is referencing the shared data structure.

     */

    rte_atomic_thread_fence(rte_memory_order_seq_cst);

}


static __rte_always_inline void

rte_rcu_qsbr_thread_offline(struct rte_rcu_qsbr *v, unsigned int thread_id)

{

    RTE_ASSERT(v != NULL && thread_id < v->max_threads);


    __RTE_RCU_IS_LOCK_CNT_ZERO(v, thread_id, ERR, "Lock counter %u",

                v->qsbr_cnt[thread_id].lock_cnt);


    /* The reader can go offline only after the load of the

     * data structure is completed. i.e. any load of the

     * data structure can not move after this store.

     */


    rte_atomic_store_explicit(&v->qsbr_cnt[thread_id].cnt,

        __RTE_QSBR_CNT_THR_OFFLINE, rte_memory_order_release);

}


static __rte_always_inline void

rte_rcu_qsbr_lock(__rte_unused struct rte_rcu_qsbr *v,

            __rte_unused unsigned int thread_id)

{

    RTE_ASSERT(v != NULL && thread_id < v->max_threads);


#if defined(RTE_LIBRTE_RCU_DEBUG)

    /* Increment the lock counter */

    rte_atomic_fetch_add_explicit(&v->qsbr_cnt[thread_id].lock_cnt,

                1, rte_memory_order_acquire);

#endif

}


static __rte_always_inline void

rte_rcu_qsbr_unlock(__rte_unused struct rte_rcu_qsbr *v,

            __rte_unused unsigned int thread_id)

{

    RTE_ASSERT(v != NULL && thread_id < v->max_threads);


#if defined(RTE_LIBRTE_RCU_DEBUG)

    /* Decrement the lock counter */

    rte_atomic_fetch_sub_explicit(&v->qsbr_cnt[thread_id].lock_cnt,

                1, rte_memory_order_release);


    __RTE_RCU_IS_LOCK_CNT_ZERO(v, thread_id, WARNING,

                "Lock counter %u. Nested locks?",

                v->qsbr_cnt[thread_id].lock_cnt);

#endif

}


static __rte_always_inline uint64_t

rte_rcu_qsbr_start(struct rte_rcu_qsbr *v)

{

    uint64_t t;


    RTE_ASSERT(v != NULL);


    /* Release the changes to the shared data structure.

     * This store release will ensure that changes to any data

     * structure are visible to the workers before the token

     * update is visible.

     */

    t = rte_atomic_fetch_add_explicit(&v->token, 1, rte_memory_order_release) + 1;


    return t;

}


static __rte_always_inline void

rte_rcu_qsbr_quiescent(struct rte_rcu_qsbr *v, unsigned int thread_id)

{

    uint64_t t;


    RTE_ASSERT(v != NULL && thread_id < v->max_threads);


    __RTE_RCU_IS_LOCK_CNT_ZERO(v, thread_id, ERR, "Lock counter %u",

                v->qsbr_cnt[thread_id].lock_cnt);


    /* Acquire the changes to the shared data structure released

     * by rte_rcu_qsbr_start.

     * Later loads of the shared data structure should not move

     * above this load. Hence, use load-acquire.

     */

    t = rte_atomic_load_explicit(&v->token, rte_memory_order_acquire);


    /* Check if there are updates available from the writer.

     * Inform the writer that updates are visible to this reader.

     * Prior loads of the shared data structure should not move

     * beyond this store. Hence use store-release.

     */

    if (t != rte_atomic_load_explicit(&v->qsbr_cnt[thread_id].cnt, rte_memory_order_relaxed))

        rte_atomic_store_explicit(&v->qsbr_cnt[thread_id].cnt,

                     t, rte_memory_order_release);


    __RTE_RCU_DP_LOG(DEBUG, "%s: update: token = %" PRIu64 ", Thread ID = %d",

        __func__, t, thread_id);

}


/* Check the quiescent state counter for registered threads only, assuming

 * that not all threads have registered.

 */

static __rte_always_inline int

__rte_rcu_qsbr_check_selective(struct rte_rcu_qsbr *v, uint64_t t, bool wait)

{

    uint32_t i, j, id;

    uint64_t bmap;

    uint64_t c;

    RTE_ATOMIC(uint64_t) *reg_thread_id;

    uint64_t acked_token = __RTE_QSBR_CNT_MAX;


    for (i = 0, reg_thread_id = __RTE_QSBR_THRID_ARRAY_ELM(v, 0);

        i < v->num_elems;

        i++, reg_thread_id++) {

        /* Load the current registered thread bit map before

         * loading the reader thread quiescent state counters.

         */

        bmap = rte_atomic_load_explicit(reg_thread_id, rte_memory_order_acquire);

        id = i << __RTE_QSBR_THRID_INDEX_SHIFT;


        while (bmap) {

            j = rte_ctz64(bmap);

            __RTE_RCU_DP_LOG(DEBUG,

                "%s: check: token = %" PRIu64 ", wait = %d, Bit Map = 0x%" PRIx64 ", Thread ID = %d",

                __func__, t, wait, bmap, id + j);

            c = rte_atomic_load_explicit(

                    &v->qsbr_cnt[id + j].cnt,

                    rte_memory_order_acquire);

            __RTE_RCU_DP_LOG(DEBUG,

                "%s: status: token = %" PRIu64 ", wait = %d, Thread QS cnt = %" PRIu64 ", Thread ID = %d",

                __func__, t, wait, c, id+j);


            /* Counter is not checked for wrap-around condition

             * as it is a 64b counter.

             */

            if (unlikely(c !=

                __RTE_QSBR_CNT_THR_OFFLINE && c < t)) {

                /* This thread is not in quiescent state */

                if (!wait)

                    return 0;


                rte_pause();

                /* This thread might have unregistered.

                 * Re-read the bitmap.

                 */

                bmap = rte_atomic_load_explicit(reg_thread_id,

                        rte_memory_order_acquire);


                continue;

            }


            /* This thread is in quiescent state. Use the counter

             * to find the least acknowledged token among all the

             * readers.

             */

            if (c != __RTE_QSBR_CNT_THR_OFFLINE && acked_token > c)

                acked_token = c;


            bmap &= ~(1UL << j);

        }

    }


    /* All readers are checked, update least acknowledged token.

     * There might be multiple writers trying to update this. There is

     * no need to update this very accurately using compare-and-swap.

     */

    if (acked_token != __RTE_QSBR_CNT_MAX)

        rte_atomic_store_explicit(&v->acked_token, acked_token,

            rte_memory_order_relaxed);


    return 1;

}


/* Check the quiescent state counter for all threads, assuming that

 * all the threads have registered.

 */

static __rte_always_inline int

__rte_rcu_qsbr_check_all(struct rte_rcu_qsbr *v, uint64_t t, bool wait)

{

    uint32_t i;

    struct rte_rcu_qsbr_cnt *cnt;

    uint64_t c;

    uint64_t acked_token = __RTE_QSBR_CNT_MAX;


    for (i = 0, cnt = v->qsbr_cnt; i < v->max_threads; i++, cnt++) {

        __RTE_RCU_DP_LOG(DEBUG,

            "%s: check: token = %" PRIu64 ", wait = %d, Thread ID = %d",

            __func__, t, wait, i);

        while (1) {

            c = rte_atomic_load_explicit(&cnt->cnt, rte_memory_order_acquire);

            __RTE_RCU_DP_LOG(DEBUG,

                "%s: status: token = %" PRIu64 ", wait = %d, Thread QS cnt = %" PRIu64 ", Thread ID = %d",

                __func__, t, wait, c, i);


            /* Counter is not checked for wrap-around condition

             * as it is a 64b counter.

             */

            if (likely(c == __RTE_QSBR_CNT_THR_OFFLINE || c >= t))

                break;


            /* This thread is not in quiescent state */

            if (!wait)

                return 0;


            rte_pause();

        }


        /* This thread is in quiescent state. Use the counter to find

         * the least acknowledged token among all the readers.

         */

        if (likely(c != __RTE_QSBR_CNT_THR_OFFLINE && acked_token > c))

            acked_token = c;

    }


    /* All readers are checked, update least acknowledged token.

     * There might be multiple writers trying to update this. There is

     * no need to update this very accurately using compare-and-swap.

     */

    if (acked_token != __RTE_QSBR_CNT_MAX)

        rte_atomic_store_explicit(&v->acked_token, acked_token,

            rte_memory_order_relaxed);


    return 1;

}


static __rte_always_inline int

rte_rcu_qsbr_check(struct rte_rcu_qsbr *v, uint64_t t, bool wait)

{

    uint64_t acked_token;


    RTE_ASSERT(v != NULL);


    /* Check if all the readers have already acknowledged this token */

    acked_token = rte_atomic_load_explicit(&v->acked_token,

                        rte_memory_order_relaxed);

    if (likely(t <= acked_token)) {

        __RTE_RCU_DP_LOG(DEBUG,

            "%s: check: token = %" PRIu64 ", wait = %d",

            __func__, t, wait);

        __RTE_RCU_DP_LOG(DEBUG,

            "%s: status: least acked token = %" PRIu64,

            __func__, acked_token);

        return 1;

    }


    if (likely(v->num_threads == v->max_threads))

        return __rte_rcu_qsbr_check_all(v, t, wait);

    else

        return __rte_rcu_qsbr_check_selective(v, t, wait);

}


void

rte_rcu_qsbr_synchronize(struct rte_rcu_qsbr *v, unsigned int thread_id);


int

rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v);


struct rte_rcu_qsbr_dq *

rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters *params);


int

rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e);


int

rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq, unsigned int n,

    unsigned int *freed, unsigned int *pending, unsigned int *available);


int

rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq);


#ifdef __cplusplus

}

#endif


#endif /* _RTE_RCU_QSBR_H_ */

rte_atomic.h

rte_atomic_thread_fence
static void rte_atomic_thread_fence(rte_memory_order memorder)

rte_ctz64
static unsigned int rte_ctz64(uint64_t v)
Definition: rte_bitops.h:1062

likely
#define likely(x)
Definition: rte_branch_prediction.h:26

unlikely
#define unlikely(x)
Definition: rte_branch_prediction.h:43

rte_common.h

__rte_cache_aligned
#define __rte_cache_aligned
Definition: rte_common.h:739

__rte_unused
#define __rte_unused
Definition: rte_common.h:248

__rte_always_inline
#define __rte_always_inline
Definition: rte_common.h:490

rte_debug.h

rte_pause
static void rte_pause(void)

rte_rcu_qsbr_start
static __rte_always_inline uint64_t rte_rcu_qsbr_start(struct rte_rcu_qsbr *v)
Definition: rte_rcu_qsbr.h:449

rte_rcu_qsbr_dq_enqueue
int rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e)

rte_rcu_qsbr_unlock
static __rte_always_inline void rte_rcu_qsbr_unlock(__rte_unused struct rte_rcu_qsbr *v, __rte_unused unsigned int thread_id)
Definition: rte_rcu_qsbr.h:419

rte_rcu_qsbr_thread_register
int rte_rcu_qsbr_thread_register(struct rte_rcu_qsbr *v, unsigned int thread_id)

rte_rcu_qsbr_thread_online
static __rte_always_inline void rte_rcu_qsbr_thread_online(struct rte_rcu_qsbr *v, unsigned int thread_id)
Definition: rte_rcu_qsbr.h:296

rte_rcu_qsbr_thread_unregister
int rte_rcu_qsbr_thread_unregister(struct rte_rcu_qsbr *v, unsigned int thread_id)

rte_rcu_qsbr_lock
static __rte_always_inline void rte_rcu_qsbr_lock(__rte_unused struct rte_rcu_qsbr *v, __rte_unused unsigned int thread_id)
Definition: rte_rcu_qsbr.h:386

rte_rcu_qsbr_init
int rte_rcu_qsbr_init(struct rte_rcu_qsbr *v, uint32_t max_threads)

rte_rcu_qsbr_thread_offline
static __rte_always_inline void rte_rcu_qsbr_thread_offline(struct rte_rcu_qsbr *v, unsigned int thread_id)
Definition: rte_rcu_qsbr.h:349

rte_rcu_qsbr_dq_create
struct rte_rcu_qsbr_dq * rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters *params)

rte_rcu_qsbr_dq_reclaim
int rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq, unsigned int n, unsigned int *freed, unsigned int *pending, unsigned int *available)

rte_rcu_qsbr_free_resource_t
void(* rte_rcu_qsbr_free_resource_t)(void *p, void *e, unsigned int n)
Definition: rte_rcu_qsbr.h:131

rte_rcu_qsbr_synchronize
void rte_rcu_qsbr_synchronize(struct rte_rcu_qsbr *v, unsigned int thread_id)

rte_rcu_qsbr_dump
int rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v)

rte_rcu_qsbr_dq_delete
int rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq)

rte_rcu_qsbr_check
static __rte_always_inline int rte_rcu_qsbr_check(struct rte_rcu_qsbr *v, uint64_t t, bool wait)
Definition: rte_rcu_qsbr.h:665

rte_rcu_qsbr_get_memsize
size_t rte_rcu_qsbr_get_memsize(uint32_t max_threads)

rte_rcu_qsbr_quiescent
static __rte_always_inline void rte_rcu_qsbr_quiescent(struct rte_rcu_qsbr *v, unsigned int thread_id)
Definition: rte_rcu_qsbr.h:478

rte_ring.h

rte_rcu_qsbr_dq_parameters
Definition: rte_rcu_qsbr.h:148

rte_rcu_qsbr_dq_parameters::p
void * p
Definition: rte_rcu_qsbr.h:182

rte_rcu_qsbr_dq_parameters::v
struct rte_rcu_qsbr * v
Definition: rte_rcu_qsbr.h:187

rte_rcu_qsbr_dq_parameters::free_fn
rte_rcu_qsbr_free_resource_t free_fn
Definition: rte_rcu_qsbr.h:180

rte_rcu_qsbr_dq_parameters::esize
uint32_t esize
Definition: rte_rcu_qsbr.h:160

rte_rcu_qsbr_dq_parameters::flags
uint32_t flags
Definition: rte_rcu_qsbr.h:151

rte_rcu_qsbr_dq_parameters::trigger_reclaim_limit
uint32_t trigger_reclaim_limit
Definition: rte_rcu_qsbr.h:164

rte_rcu_qsbr_dq_parameters::name
const char * name
Definition: rte_rcu_qsbr.h:149

rte_rcu_qsbr_dq_parameters::size
uint32_t size
Definition: rte_rcu_qsbr.h:153

rte_rcu_qsbr_dq_parameters::max_reclaim_size
uint32_t max_reclaim_size
Definition: rte_rcu_qsbr.h:174