kernel/locking/qspinlock.h - linux - Git at Google

 /* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Queued spinlock defines
  *
  * This file contains macro definitions and functions shared between different
  * qspinlock slow path implementations.
  */
 #ifndef __LINUX_QSPINLOCK_H
 #define __LINUX_QSPINLOCK_H

 #include <asm-generic/percpu.h>
 #include <linux/percpu-defs.h>
 #include <asm-generic/qspinlock.h>
 #include <asm-generic/mcs_spinlock.h>

 #define _Q_MAX_NODES	4

 /*
  * The pending bit spinning loop count.
  * This heuristic is used to limit the number of lockword accesses
  * made by atomic_cond_read_relaxed when waiting for the lock to
  * transition out of the "== _Q_PENDING_VAL" state. We don't spin
  * indefinitely because there's no guarantee that we'll make forward
  * progress.
  */
 #ifndef _Q_PENDING_LOOPS
 #define _Q_PENDING_LOOPS	1
 #endif

 /*
  * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in
  * size and four of them will fit nicely in one 64-byte cacheline. For
  * pvqspinlock, however, we need more space for extra data. To accommodate
  * that, we insert two more long words to pad it up to 32 bytes. IOW, only
  * two of them can fit in a cacheline in this case. That is OK as it is rare
  * to have more than 2 levels of slowpath nesting in actual use. We don't
  * want to penalize pvqspinlocks to optimize for a rare case in native
  * qspinlocks.
  */
 struct qnode {
 	struct mcs_spinlock mcs;
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 	long reserved[2];
 #endif
 };

 /*
  * We must be able to distinguish between no-tail and the tail at 0:0,
  * therefore increment the cpu number by one.
  */

 static inline __pure u32 encode_tail(int cpu, int idx)
 {
 	u32 tail;

 	tail  = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
 	tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */

 	return tail;
 }

 static inline __pure struct mcs_spinlock *decode_tail(u32 tail,
 						      struct qnode __percpu *qnodes)
 {
 	int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
 	int idx = (tail &  _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;

 	return per_cpu_ptr(&qnodes[idx].mcs, cpu);
 }

 static inline __pure
 struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx)
 {
 	return &((struct qnode *)base + idx)->mcs;
 }

 #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)

 #if _Q_PENDING_BITS == 8
 /**
  * clear_pending - clear the pending bit.
  * @lock: Pointer to queued spinlock structure
  *
  * *,1,* -> *,0,*
  */
 static __always_inline void clear_pending(struct qspinlock *lock)
 {
 	WRITE_ONCE(lock->pending, 0);
 }

 /**
  * clear_pending_set_locked - take ownership and clear the pending bit.
  * @lock: Pointer to queued spinlock structure
  *
  * *,1,0 -> *,0,1
  *
  * Lock stealing is not allowed if this function is used.
  */
 static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
 {
 	WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
 }

 /*
  * xchg_tail - Put in the new queue tail code word & retrieve previous one
  * @lock : Pointer to queued spinlock structure
  * @tail : The new queue tail code word
  * Return: The previous queue tail code word
  *
  * xchg(lock, tail), which heads an address dependency
  *
  * p,*,* -> n,*,* ; prev = xchg(lock, node)
  */
 static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
 {
 	/*
 	 * We can use relaxed semantics since the caller ensures that the
 	 * MCS node is properly initialized before updating the tail.
 	 */
 	return (u32)xchg_relaxed(&lock->tail,
 				 tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
 }

 #else /* _Q_PENDING_BITS == 8 */

 /**
  * clear_pending - clear the pending bit.
  * @lock: Pointer to queued spinlock structure
  *
  * *,1,* -> *,0,*
  */
 static __always_inline void clear_pending(struct qspinlock *lock)
 {
 	atomic_andnot(_Q_PENDING_VAL, &lock->val);
 }

 /**
  * clear_pending_set_locked - take ownership and clear the pending bit.
  * @lock: Pointer to queued spinlock structure
  *
  * *,1,0 -> *,0,1
  */
 static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
 {
 	atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val);
 }

 /**
  * xchg_tail - Put in the new queue tail code word & retrieve previous one
  * @lock : Pointer to queued spinlock structure
  * @tail : The new queue tail code word
  * Return: The previous queue tail code word
  *
  * xchg(lock, tail)
  *
  * p,*,* -> n,*,* ; prev = xchg(lock, node)
  */
 static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
 {
 	u32 old, new;

 	old = atomic_read(&lock->val);
 	do {
 		new = (old & _Q_LOCKED_PENDING_MASK) | tail;
 		/*
 		 * We can use relaxed semantics since the caller ensures that
 		 * the MCS node is properly initialized before updating the
 		 * tail.
 		 */
 	} while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));

 	return old;
 }
 #endif /* _Q_PENDING_BITS == 8 */

 /**
  * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending
  * @lock : Pointer to queued spinlock structure
  * Return: The previous lock value
  *
  * *,*,* -> *,1,*
  */
 #ifndef queued_fetch_set_pending_acquire
 static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock)
 {
 	return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
 }
 #endif

 /**
  * set_locked - Set the lock bit and own the lock
  * @lock: Pointer to queued spinlock structure
  *
  * *,*,0 -> *,0,1
  */
 static __always_inline void set_locked(struct qspinlock *lock)
 {
 	WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
 }

 #endif /* __LINUX_QSPINLOCK_H */
	/* SPDX-License-Identifier: GPL-2.0-or-later */
	/*
	* Queued spinlock defines
	*
	* This file contains macro definitions and functions shared between different
	* qspinlock slow path implementations.
	*/
	#ifndef __LINUX_QSPINLOCK_H
	#define __LINUX_QSPINLOCK_H

	#include <asm-generic/percpu.h>
	#include <linux/percpu-defs.h>
	#include <asm-generic/qspinlock.h>
	#include <asm-generic/mcs_spinlock.h>

	#define _Q_MAX_NODES 4

	/*
	* The pending bit spinning loop count.
	* This heuristic is used to limit the number of lockword accesses
	* made by atomic_cond_read_relaxed when waiting for the lock to
	* transition out of the "== _Q_PENDING_VAL" state. We don't spin
	* indefinitely because there's no guarantee that we'll make forward
	* progress.
	*/
	#ifndef _Q_PENDING_LOOPS
	#define _Q_PENDING_LOOPS 1
	#endif

	/*
	* On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in
	* size and four of them will fit nicely in one 64-byte cacheline. For
	* pvqspinlock, however, we need more space for extra data. To accommodate
	* that, we insert two more long words to pad it up to 32 bytes. IOW, only
	* two of them can fit in a cacheline in this case. That is OK as it is rare
	* to have more than 2 levels of slowpath nesting in actual use. We don't
	* want to penalize pvqspinlocks to optimize for a rare case in native
	* qspinlocks.
	*/
	struct qnode {
	struct mcs_spinlock mcs;
	#ifdef CONFIG_PARAVIRT_SPINLOCKS
	long reserved[2];
	#endif
	};

	/*
	* We must be able to distinguish between no-tail and the tail at 0:0,
	* therefore increment the cpu number by one.
	*/

	static inline __pure u32 encode_tail(int cpu, int idx)
	{
	u32 tail;

	tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
	tail \|= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */

	return tail;
	}

	static inline __pure struct mcs_spinlock *decode_tail(u32 tail,
	struct qnode __percpu *qnodes)
	{
	int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
	int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;

	return per_cpu_ptr(&qnodes[idx].mcs, cpu);
	}

	static inline __pure
	struct mcs_spinlock grab_mcs_node(struct mcs_spinlock base, int idx)
	{
	return &((struct qnode *)base + idx)->mcs;
	}

	#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK \| _Q_PENDING_MASK)

	#if _Q_PENDING_BITS == 8
	/**
	* clear_pending - clear the pending bit.
	* @lock: Pointer to queued spinlock structure
	*
	* ,1, -> ,0,
	*/
	static __always_inline void clear_pending(struct qspinlock *lock)
	{
	WRITE_ONCE(lock->pending, 0);
	}

	/**
	* clear_pending_set_locked - take ownership and clear the pending bit.
	* @lock: Pointer to queued spinlock structure
	*
	* ,1,0 -> ,0,1
	*
	* Lock stealing is not allowed if this function is used.
	*/
	static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
	{
	WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
	}

	/*
	* xchg_tail - Put in the new queue tail code word & retrieve previous one
	* @lock : Pointer to queued spinlock structure
	* @tail : The new queue tail code word
	* Return: The previous queue tail code word
	*
	* xchg(lock, tail), which heads an address dependency
	*
	* p,, -> n,, ; prev = xchg(lock, node)
	*/
	static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
	{
	/*
	* We can use relaxed semantics since the caller ensures that the
	* MCS node is properly initialized before updating the tail.
	*/
	return (u32)xchg_relaxed(&lock->tail,
	tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
	}

	#else /* _Q_PENDING_BITS == 8 */

	/**
	* clear_pending - clear the pending bit.
	* @lock: Pointer to queued spinlock structure
	*
	* ,1, -> ,0,
	*/
	static __always_inline void clear_pending(struct qspinlock *lock)
	{
	atomic_andnot(_Q_PENDING_VAL, &lock->val);
	}

	/**
	* clear_pending_set_locked - take ownership and clear the pending bit.
	* @lock: Pointer to queued spinlock structure
	*
	* ,1,0 -> ,0,1
	*/
	static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
	{
	atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val);
	}

	/**
	* xchg_tail - Put in the new queue tail code word & retrieve previous one
	* @lock : Pointer to queued spinlock structure
	* @tail : The new queue tail code word
	* Return: The previous queue tail code word
	*
	* xchg(lock, tail)
	*
	* p,, -> n,, ; prev = xchg(lock, node)
	*/
	static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
	{
	u32 old, new;

	old = atomic_read(&lock->val);
	do {
	new = (old & _Q_LOCKED_PENDING_MASK) \| tail;
	/*
	* We can use relaxed semantics since the caller ensures that
	* the MCS node is properly initialized before updating the
	* tail.
	*/
	} while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));

	return old;
	}
	#endif /* _Q_PENDING_BITS == 8 */

	/**
	* queued_fetch_set_pending_acquire - fetch the whole lock value and set pending
	* @lock : Pointer to queued spinlock structure
	* Return: The previous lock value
	*
	* ,,* -> ,1,
	*/
	#ifndef queued_fetch_set_pending_acquire
	static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock)
	{
	return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
	}
	#endif

	/**
	* set_locked - Set the lock bit and own the lock
	* @lock: Pointer to queued spinlock structure
	*
	* ,,0 -> *,0,1
	*/
	static __always_inline void set_locked(struct qspinlock *lock)
	{
	WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
	}

	#endif /* __LINUX_QSPINLOCK_H */