|  | // SPDX-License-Identifier: GPL-2.0-only | 
|  |  | 
|  | /* | 
|  | * rcuref - A scalable reference count implementation for RCU managed objects | 
|  | * | 
|  | * rcuref is provided to replace open coded reference count implementations | 
|  | * based on atomic_t. It protects explicitely RCU managed objects which can | 
|  | * be visible even after the last reference has been dropped and the object | 
|  | * is heading towards destruction. | 
|  | * | 
|  | * A common usage pattern is: | 
|  | * | 
|  | * get() | 
|  | *	rcu_read_lock(); | 
|  | *	p = get_ptr(); | 
|  | *	if (p && !atomic_inc_not_zero(&p->refcnt)) | 
|  | *		p = NULL; | 
|  | *	rcu_read_unlock(); | 
|  | *	return p; | 
|  | * | 
|  | * put() | 
|  | *	if (!atomic_dec_return(&->refcnt)) { | 
|  | *		remove_ptr(p); | 
|  | *		kfree_rcu((p, rcu); | 
|  | *	} | 
|  | * | 
|  | * atomic_inc_not_zero() is implemented with a try_cmpxchg() loop which has | 
|  | * O(N^2) behaviour under contention with N concurrent operations. | 
|  | * | 
|  | * rcuref uses atomic_add_negative_relaxed() for the fast path, which scales | 
|  | * better under contention. | 
|  | * | 
|  | * Why not refcount? | 
|  | * ================= | 
|  | * | 
|  | * In principle it should be possible to make refcount use the rcuref | 
|  | * scheme, but the destruction race described below cannot be prevented | 
|  | * unless the protected object is RCU managed. | 
|  | * | 
|  | * Theory of operation | 
|  | * =================== | 
|  | * | 
|  | * rcuref uses an unsigned integer reference counter. As long as the | 
|  | * counter value is greater than or equal to RCUREF_ONEREF and not larger | 
|  | * than RCUREF_MAXREF the reference is alive: | 
|  | * | 
|  | * ONEREF   MAXREF               SATURATED             RELEASED      DEAD    NOREF | 
|  | * 0        0x7FFFFFFF 0x8000000 0xA0000000 0xBFFFFFFF 0xC0000000 0xE0000000 0xFFFFFFFF | 
|  | * <---valid --------> <-------saturation zone-------> <-----dead zone-----> | 
|  | * | 
|  | * The get() and put() operations do unconditional increments and | 
|  | * decrements. The result is checked after the operation. This optimizes | 
|  | * for the fast path. | 
|  | * | 
|  | * If the reference count is saturated or dead, then the increments and | 
|  | * decrements are not harmful as the reference count still stays in the | 
|  | * respective zones and is always set back to STATURATED resp. DEAD. The | 
|  | * zones have room for 2^28 racing operations in each direction, which | 
|  | * makes it practically impossible to escape the zones. | 
|  | * | 
|  | * Once the last reference is dropped the reference count becomes | 
|  | * RCUREF_NOREF which forces rcuref_put() into the slowpath operation. The | 
|  | * slowpath then tries to set the reference count from RCUREF_NOREF to | 
|  | * RCUREF_DEAD via a cmpxchg(). This opens a small window where a | 
|  | * concurrent rcuref_get() can acquire the reference count and bring it | 
|  | * back to RCUREF_ONEREF or even drop the reference again and mark it DEAD. | 
|  | * | 
|  | * If the cmpxchg() succeeds then a concurrent rcuref_get() will result in | 
|  | * DEAD + 1, which is inside the dead zone. If that happens the reference | 
|  | * count is put back to DEAD. | 
|  | * | 
|  | * The actual race is possible due to the unconditional increment and | 
|  | * decrements in rcuref_get() and rcuref_put(): | 
|  | * | 
|  | *	T1				T2 | 
|  | *	get()				put() | 
|  | *					if (atomic_add_negative(-1, &ref->refcnt)) | 
|  | *		succeeds->			atomic_cmpxchg(&ref->refcnt, NOREF, DEAD); | 
|  | * | 
|  | *	atomic_add_negative(1, &ref->refcnt);	<- Elevates refcount to DEAD + 1 | 
|  | * | 
|  | * As the result of T1's add is negative, the get() goes into the slow path | 
|  | * and observes refcnt being in the dead zone which makes the operation fail. | 
|  | * | 
|  | * Possible critical states: | 
|  | * | 
|  | *	Context Counter	References	Operation | 
|  | *	T1	0	1		init() | 
|  | *	T2	1	2		get() | 
|  | *	T1	0	1		put() | 
|  | *	T2     -1	0		put() tries to mark dead | 
|  | *	T1	0	1		get() | 
|  | *	T2	0	1		put() mark dead fails | 
|  | *	T1     -1	0		put() tries to mark dead | 
|  | *	T1    DEAD	0		put() mark dead succeeds | 
|  | *	T2    DEAD+1	0		get() fails and puts it back to DEAD | 
|  | * | 
|  | * Of course there are more complex scenarios, but the above illustrates | 
|  | * the working principle. The rest is left to the imagination of the | 
|  | * reader. | 
|  | * | 
|  | * Deconstruction race | 
|  | * =================== | 
|  | * | 
|  | * The release operation must be protected by prohibiting a grace period in | 
|  | * order to prevent a possible use after free: | 
|  | * | 
|  | *	T1				T2 | 
|  | *	put()				get() | 
|  | *	// ref->refcnt = ONEREF | 
|  | *	if (!atomic_add_negative(-1, &ref->refcnt)) | 
|  | *		return false;				<- Not taken | 
|  | * | 
|  | *	// ref->refcnt == NOREF | 
|  | *	--> preemption | 
|  | *					// Elevates ref->refcnt to ONEREF | 
|  | *					if (!atomic_add_negative(1, &ref->refcnt)) | 
|  | *						return true;			<- taken | 
|  | * | 
|  | *					if (put(&p->ref)) { <-- Succeeds | 
|  | *						remove_pointer(p); | 
|  | *						kfree_rcu(p, rcu); | 
|  | *					} | 
|  | * | 
|  | *		RCU grace period ends, object is freed | 
|  | * | 
|  | *	atomic_cmpxchg(&ref->refcnt, NOREF, DEAD);	<- UAF | 
|  | * | 
|  | * This is prevented by disabling preemption around the put() operation as | 
|  | * that's in most kernel configurations cheaper than a rcu_read_lock() / | 
|  | * rcu_read_unlock() pair and in many cases even a NOOP. In any case it | 
|  | * prevents the grace period which keeps the object alive until all put() | 
|  | * operations complete. | 
|  | * | 
|  | * Saturation protection | 
|  | * ===================== | 
|  | * | 
|  | * The reference count has a saturation limit RCUREF_MAXREF (INT_MAX). | 
|  | * Once this is exceedded the reference count becomes stale by setting it | 
|  | * to RCUREF_SATURATED, which will cause a memory leak, but it prevents | 
|  | * wrap arounds which obviously cause worse problems than a memory | 
|  | * leak. When saturation is reached a warning is emitted. | 
|  | * | 
|  | * Race conditions | 
|  | * =============== | 
|  | * | 
|  | * All reference count increment/decrement operations are unconditional and | 
|  | * only verified after the fact. This optimizes for the good case and takes | 
|  | * the occasional race vs. a dead or already saturated refcount into | 
|  | * account. The saturation and dead zones are large enough to accomodate | 
|  | * for that. | 
|  | * | 
|  | * Memory ordering | 
|  | * =============== | 
|  | * | 
|  | * Memory ordering rules are slightly relaxed wrt regular atomic_t functions | 
|  | * and provide only what is strictly required for refcounts. | 
|  | * | 
|  | * The increments are fully relaxed; these will not provide ordering. The | 
|  | * rationale is that whatever is used to obtain the object to increase the | 
|  | * reference count on will provide the ordering. For locked data | 
|  | * structures, its the lock acquire, for RCU/lockless data structures its | 
|  | * the dependent load. | 
|  | * | 
|  | * rcuref_get() provides a control dependency ordering future stores which | 
|  | * ensures that the object is not modified when acquiring a reference | 
|  | * fails. | 
|  | * | 
|  | * rcuref_put() provides release order, i.e. all prior loads and stores | 
|  | * will be issued before. It also provides a control dependency ordering | 
|  | * against the subsequent destruction of the object. | 
|  | * | 
|  | * If rcuref_put() successfully dropped the last reference and marked the | 
|  | * object DEAD it also provides acquire ordering. | 
|  | */ | 
|  |  | 
|  | #include <linux/export.h> | 
|  | #include <linux/rcuref.h> | 
|  |  | 
|  | /** | 
|  | * rcuref_get_slowpath - Slowpath of rcuref_get() | 
|  | * @ref:	Pointer to the reference count | 
|  | * | 
|  | * Invoked when the reference count is outside of the valid zone. | 
|  | * | 
|  | * Return: | 
|  | *	False if the reference count was already marked dead | 
|  | * | 
|  | *	True if the reference count is saturated, which prevents the | 
|  | *	object from being deconstructed ever. | 
|  | */ | 
|  | bool rcuref_get_slowpath(rcuref_t *ref) | 
|  | { | 
|  | unsigned int cnt = atomic_read(&ref->refcnt); | 
|  |  | 
|  | /* | 
|  | * If the reference count was already marked dead, undo the | 
|  | * increment so it stays in the middle of the dead zone and return | 
|  | * fail. | 
|  | */ | 
|  | if (cnt >= RCUREF_RELEASED) { | 
|  | atomic_set(&ref->refcnt, RCUREF_DEAD); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * If it was saturated, warn and mark it so. In case the increment | 
|  | * was already on a saturated value restore the saturation | 
|  | * marker. This keeps it in the middle of the saturation zone and | 
|  | * prevents the reference count from overflowing. This leaks the | 
|  | * object memory, but prevents the obvious reference count overflow | 
|  | * damage. | 
|  | */ | 
|  | if (WARN_ONCE(cnt > RCUREF_MAXREF, "rcuref saturated - leaking memory")) | 
|  | atomic_set(&ref->refcnt, RCUREF_SATURATED); | 
|  | return true; | 
|  | } | 
|  | EXPORT_SYMBOL_GPL(rcuref_get_slowpath); | 
|  |  | 
|  | /** | 
|  | * rcuref_put_slowpath - Slowpath of __rcuref_put() | 
|  | * @ref:	Pointer to the reference count | 
|  | * | 
|  | * Invoked when the reference count is outside of the valid zone. | 
|  | * | 
|  | * Return: | 
|  | *	True if this was the last reference with no future references | 
|  | *	possible. This signals the caller that it can safely schedule the | 
|  | *	object, which is protected by the reference counter, for | 
|  | *	deconstruction. | 
|  | * | 
|  | *	False if there are still active references or the put() raced | 
|  | *	with a concurrent get()/put() pair. Caller is not allowed to | 
|  | *	deconstruct the protected object. | 
|  | */ | 
|  | bool rcuref_put_slowpath(rcuref_t *ref) | 
|  | { | 
|  | unsigned int cnt = atomic_read(&ref->refcnt); | 
|  |  | 
|  | /* Did this drop the last reference? */ | 
|  | if (likely(cnt == RCUREF_NOREF)) { | 
|  | /* | 
|  | * Carefully try to set the reference count to RCUREF_DEAD. | 
|  | * | 
|  | * This can fail if a concurrent get() operation has | 
|  | * elevated it again or the corresponding put() even marked | 
|  | * it dead already. Both are valid situations and do not | 
|  | * require a retry. If this fails the caller is not | 
|  | * allowed to deconstruct the object. | 
|  | */ | 
|  | if (!atomic_try_cmpxchg_release(&ref->refcnt, &cnt, RCUREF_DEAD)) | 
|  | return false; | 
|  |  | 
|  | /* | 
|  | * The caller can safely schedule the object for | 
|  | * deconstruction. Provide acquire ordering. | 
|  | */ | 
|  | smp_acquire__after_ctrl_dep(); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * If the reference count was already in the dead zone, then this | 
|  | * put() operation is imbalanced. Warn, put the reference count back to | 
|  | * DEAD and tell the caller to not deconstruct the object. | 
|  | */ | 
|  | if (WARN_ONCE(cnt >= RCUREF_RELEASED, "rcuref - imbalanced put()")) { | 
|  | atomic_set(&ref->refcnt, RCUREF_DEAD); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * This is a put() operation on a saturated refcount. Restore the | 
|  | * mean saturation value and tell the caller to not deconstruct the | 
|  | * object. | 
|  | */ | 
|  | if (cnt > RCUREF_MAXREF) | 
|  | atomic_set(&ref->refcnt, RCUREF_SATURATED); | 
|  | return false; | 
|  | } | 
|  | EXPORT_SYMBOL_GPL(rcuref_put_slowpath); |