| /* SPDX-License-Identifier: GPL-2.0 */ |
| #ifndef _LINUX_MMAP_LOCK_H |
| #define _LINUX_MMAP_LOCK_H |
| |
| /* Avoid a dependency loop by declaring here. */ |
| extern int rcuwait_wake_up(struct rcuwait *w); |
| |
| #include <linux/lockdep.h> |
| #include <linux/mm_types.h> |
| #include <linux/mmdebug.h> |
| #include <linux/rwsem.h> |
| #include <linux/tracepoint-defs.h> |
| #include <linux/types.h> |
| #include <linux/cleanup.h> |
| #include <linux/sched/mm.h> |
| |
| #define MMAP_LOCK_INITIALIZER(name) \ |
| .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), |
| |
| DECLARE_TRACEPOINT(mmap_lock_start_locking); |
| DECLARE_TRACEPOINT(mmap_lock_acquire_returned); |
| DECLARE_TRACEPOINT(mmap_lock_released); |
| |
| #ifdef CONFIG_TRACING |
| |
| void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write); |
| void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, |
| bool success); |
| void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write); |
| |
| static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm, |
| bool write) |
| { |
| if (tracepoint_enabled(mmap_lock_start_locking)) |
| __mmap_lock_do_trace_start_locking(mm, write); |
| } |
| |
| static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm, |
| bool write, bool success) |
| { |
| if (tracepoint_enabled(mmap_lock_acquire_returned)) |
| __mmap_lock_do_trace_acquire_returned(mm, write, success); |
| } |
| |
| static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write) |
| { |
| if (tracepoint_enabled(mmap_lock_released)) |
| __mmap_lock_do_trace_released(mm, write); |
| } |
| |
| #else /* !CONFIG_TRACING */ |
| |
| static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm, |
| bool write) |
| { |
| } |
| |
| static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm, |
| bool write, bool success) |
| { |
| } |
| |
| static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write) |
| { |
| } |
| |
| #endif /* CONFIG_TRACING */ |
| |
| static inline void mmap_assert_locked(const struct mm_struct *mm) |
| { |
| rwsem_assert_held(&mm->mmap_lock); |
| } |
| |
| static inline void mmap_assert_write_locked(const struct mm_struct *mm) |
| { |
| rwsem_assert_held_write(&mm->mmap_lock); |
| } |
| |
| #ifdef CONFIG_PER_VMA_LOCK |
| |
| static inline void mm_lock_seqcount_init(struct mm_struct *mm) |
| { |
| seqcount_init(&mm->mm_lock_seq); |
| } |
| |
| static inline void mm_lock_seqcount_begin(struct mm_struct *mm) |
| { |
| do_raw_write_seqcount_begin(&mm->mm_lock_seq); |
| } |
| |
| static inline void mm_lock_seqcount_end(struct mm_struct *mm) |
| { |
| ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq); |
| do_raw_write_seqcount_end(&mm->mm_lock_seq); |
| } |
| |
| static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq) |
| { |
| /* |
| * Since mmap_lock is a sleeping lock, and waiting for it to become |
| * unlocked is more or less equivalent with taking it ourselves, don't |
| * bother with the speculative path if mmap_lock is already write-locked |
| * and take the slow path, which takes the lock. |
| */ |
| return raw_seqcount_try_begin(&mm->mm_lock_seq, *seq); |
| } |
| |
| static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq) |
| { |
| return read_seqcount_retry(&mm->mm_lock_seq, seq); |
| } |
| |
| static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) |
| { |
| #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| static struct lock_class_key lockdep_key; |
| |
| lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0); |
| #endif |
| if (reset_refcnt) |
| refcount_set(&vma->vm_refcnt, 0); |
| vma->vm_lock_seq = UINT_MAX; |
| } |
| |
| static inline bool is_vma_writer_only(int refcnt) |
| { |
| /* |
| * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma |
| * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on |
| * a detached vma happens only in vma_mark_detached() and is a rare |
| * case, therefore most of the time there will be no unnecessary wakeup. |
| */ |
| return refcnt & VMA_LOCK_OFFSET && refcnt <= VMA_LOCK_OFFSET + 1; |
| } |
| |
| static inline void vma_refcount_put(struct vm_area_struct *vma) |
| { |
| /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ |
| struct mm_struct *mm = vma->vm_mm; |
| int oldcnt; |
| |
| rwsem_release(&vma->vmlock_dep_map, _RET_IP_); |
| if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) { |
| |
| if (is_vma_writer_only(oldcnt - 1)) |
| rcuwait_wake_up(&mm->vma_writer_wait); |
| } |
| } |
| |
| /* |
| * Try to read-lock a vma. The function is allowed to occasionally yield false |
| * locked result to avoid performance overhead, in which case we fall back to |
| * using mmap_lock. The function should never yield false unlocked result. |
| * False locked result is possible if mm_lock_seq overflows or if vma gets |
| * reused and attached to a different mm before we lock it. |
| * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got |
| * detached. |
| * |
| * WARNING! The vma passed to this function cannot be used if the function |
| * fails to lock it because in certain cases RCU lock is dropped and then |
| * reacquired. Once RCU lock is dropped the vma can be concurently freed. |
| */ |
| static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, |
| struct vm_area_struct *vma) |
| { |
| int oldcnt; |
| |
| /* |
| * Check before locking. A race might cause false locked result. |
| * We can use READ_ONCE() for the mm_lock_seq here, and don't need |
| * ACQUIRE semantics, because this is just a lockless check whose result |
| * we don't rely on for anything - the mm_lock_seq read against which we |
| * need ordering is below. |
| */ |
| if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence)) |
| return NULL; |
| |
| /* |
| * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire() |
| * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET. |
| * Acquire fence is required here to avoid reordering against later |
| * vm_lock_seq check and checks inside lock_vma_under_rcu(). |
| */ |
| if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, |
| VMA_REF_LIMIT))) { |
| /* return EAGAIN if vma got detached from under us */ |
| return oldcnt ? NULL : ERR_PTR(-EAGAIN); |
| } |
| |
| rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); |
| |
| /* |
| * If vma got attached to another mm from under us, that mm is not |
| * stable and can be freed in the narrow window after vma->vm_refcnt |
| * is dropped and before rcuwait_wake_up(mm) is called. Grab it before |
| * releasing vma->vm_refcnt. |
| */ |
| if (unlikely(vma->vm_mm != mm)) { |
| /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ |
| struct mm_struct *other_mm = vma->vm_mm; |
| |
| /* |
| * __mmdrop() is a heavy operation and we don't need RCU |
| * protection here. Release RCU lock during these operations. |
| * We reinstate the RCU read lock as the caller expects it to |
| * be held when this function returns even on error. |
| */ |
| rcu_read_unlock(); |
| mmgrab(other_mm); |
| vma_refcount_put(vma); |
| mmdrop(other_mm); |
| rcu_read_lock(); |
| return NULL; |
| } |
| |
| /* |
| * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. |
| * False unlocked result is impossible because we modify and check |
| * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq |
| * modification invalidates all existing locks. |
| * |
| * We must use ACQUIRE semantics for the mm_lock_seq so that if we are |
| * racing with vma_end_write_all(), we only start reading from the VMA |
| * after it has been unlocked. |
| * This pairs with RELEASE semantics in vma_end_write_all(). |
| */ |
| if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) { |
| vma_refcount_put(vma); |
| return NULL; |
| } |
| |
| return vma; |
| } |
| |
| /* |
| * Use only while holding mmap read lock which guarantees that locking will not |
| * fail (nobody can concurrently write-lock the vma). vma_start_read() should |
| * not be used in such cases because it might fail due to mm_lock_seq overflow. |
| * This functionality is used to obtain vma read lock and drop the mmap read lock. |
| */ |
| static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass) |
| { |
| int oldcnt; |
| |
| mmap_assert_locked(vma->vm_mm); |
| if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, |
| VMA_REF_LIMIT))) |
| return false; |
| |
| rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); |
| return true; |
| } |
| |
| /* |
| * Use only while holding mmap read lock which guarantees that locking will not |
| * fail (nobody can concurrently write-lock the vma). vma_start_read() should |
| * not be used in such cases because it might fail due to mm_lock_seq overflow. |
| * This functionality is used to obtain vma read lock and drop the mmap read lock. |
| */ |
| static inline bool vma_start_read_locked(struct vm_area_struct *vma) |
| { |
| return vma_start_read_locked_nested(vma, 0); |
| } |
| |
| static inline void vma_end_read(struct vm_area_struct *vma) |
| { |
| vma_refcount_put(vma); |
| } |
| |
| /* WARNING! Can only be used if mmap_lock is expected to be write-locked */ |
| static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq) |
| { |
| mmap_assert_write_locked(vma->vm_mm); |
| |
| /* |
| * current task is holding mmap_write_lock, both vma->vm_lock_seq and |
| * mm->mm_lock_seq can't be concurrently modified. |
| */ |
| *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence; |
| return (vma->vm_lock_seq == *mm_lock_seq); |
| } |
| |
| void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq); |
| |
| /* |
| * Begin writing to a VMA. |
| * Exclude concurrent readers under the per-VMA lock until the currently |
| * write-locked mmap_lock is dropped or downgraded. |
| */ |
| static inline void vma_start_write(struct vm_area_struct *vma) |
| { |
| unsigned int mm_lock_seq; |
| |
| if (__is_vma_write_locked(vma, &mm_lock_seq)) |
| return; |
| |
| __vma_start_write(vma, mm_lock_seq); |
| } |
| |
| static inline void vma_assert_write_locked(struct vm_area_struct *vma) |
| { |
| unsigned int mm_lock_seq; |
| |
| VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); |
| } |
| |
| static inline void vma_assert_locked(struct vm_area_struct *vma) |
| { |
| unsigned int mm_lock_seq; |
| |
| VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 && |
| !__is_vma_write_locked(vma, &mm_lock_seq), vma); |
| } |
| |
| /* |
| * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these |
| * assertions should be made either under mmap_write_lock or when the object |
| * has been isolated under mmap_write_lock, ensuring no competing writers. |
| */ |
| static inline void vma_assert_attached(struct vm_area_struct *vma) |
| { |
| WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt)); |
| } |
| |
| static inline void vma_assert_detached(struct vm_area_struct *vma) |
| { |
| WARN_ON_ONCE(refcount_read(&vma->vm_refcnt)); |
| } |
| |
| static inline void vma_mark_attached(struct vm_area_struct *vma) |
| { |
| vma_assert_write_locked(vma); |
| vma_assert_detached(vma); |
| refcount_set_release(&vma->vm_refcnt, 1); |
| } |
| |
| void vma_mark_detached(struct vm_area_struct *vma); |
| |
| struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, |
| unsigned long address); |
| |
| #else /* CONFIG_PER_VMA_LOCK */ |
| |
| static inline void mm_lock_seqcount_init(struct mm_struct *mm) {} |
| static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {} |
| static inline void mm_lock_seqcount_end(struct mm_struct *mm) {} |
| |
| static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq) |
| { |
| return false; |
| } |
| |
| static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq) |
| { |
| return true; |
| } |
| static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {} |
| static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, |
| struct vm_area_struct *vma) |
| { return NULL; } |
| static inline void vma_end_read(struct vm_area_struct *vma) {} |
| static inline void vma_start_write(struct vm_area_struct *vma) {} |
| static inline void vma_assert_write_locked(struct vm_area_struct *vma) |
| { mmap_assert_write_locked(vma->vm_mm); } |
| static inline void vma_assert_attached(struct vm_area_struct *vma) {} |
| static inline void vma_assert_detached(struct vm_area_struct *vma) {} |
| static inline void vma_mark_attached(struct vm_area_struct *vma) {} |
| static inline void vma_mark_detached(struct vm_area_struct *vma) {} |
| |
| static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, |
| unsigned long address) |
| { |
| return NULL; |
| } |
| |
| static inline void vma_assert_locked(struct vm_area_struct *vma) |
| { |
| mmap_assert_locked(vma->vm_mm); |
| } |
| |
| #endif /* CONFIG_PER_VMA_LOCK */ |
| |
| static inline void mmap_write_lock(struct mm_struct *mm) |
| { |
| __mmap_lock_trace_start_locking(mm, true); |
| down_write(&mm->mmap_lock); |
| mm_lock_seqcount_begin(mm); |
| __mmap_lock_trace_acquire_returned(mm, true, true); |
| } |
| |
| static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass) |
| { |
| __mmap_lock_trace_start_locking(mm, true); |
| down_write_nested(&mm->mmap_lock, subclass); |
| mm_lock_seqcount_begin(mm); |
| __mmap_lock_trace_acquire_returned(mm, true, true); |
| } |
| |
| static inline int mmap_write_lock_killable(struct mm_struct *mm) |
| { |
| int ret; |
| |
| __mmap_lock_trace_start_locking(mm, true); |
| ret = down_write_killable(&mm->mmap_lock); |
| if (!ret) |
| mm_lock_seqcount_begin(mm); |
| __mmap_lock_trace_acquire_returned(mm, true, ret == 0); |
| return ret; |
| } |
| |
| /* |
| * Drop all currently-held per-VMA locks. |
| * This is called from the mmap_lock implementation directly before releasing |
| * a write-locked mmap_lock (or downgrading it to read-locked). |
| * This should normally NOT be called manually from other places. |
| * If you want to call this manually anyway, keep in mind that this will release |
| * *all* VMA write locks, including ones from further up the stack. |
| */ |
| static inline void vma_end_write_all(struct mm_struct *mm) |
| { |
| mmap_assert_write_locked(mm); |
| mm_lock_seqcount_end(mm); |
| } |
| |
| static inline void mmap_write_unlock(struct mm_struct *mm) |
| { |
| __mmap_lock_trace_released(mm, true); |
| vma_end_write_all(mm); |
| up_write(&mm->mmap_lock); |
| } |
| |
| static inline void mmap_write_downgrade(struct mm_struct *mm) |
| { |
| __mmap_lock_trace_acquire_returned(mm, false, true); |
| vma_end_write_all(mm); |
| downgrade_write(&mm->mmap_lock); |
| } |
| |
| static inline void mmap_read_lock(struct mm_struct *mm) |
| { |
| __mmap_lock_trace_start_locking(mm, false); |
| down_read(&mm->mmap_lock); |
| __mmap_lock_trace_acquire_returned(mm, false, true); |
| } |
| |
| static inline int mmap_read_lock_killable(struct mm_struct *mm) |
| { |
| int ret; |
| |
| __mmap_lock_trace_start_locking(mm, false); |
| ret = down_read_killable(&mm->mmap_lock); |
| __mmap_lock_trace_acquire_returned(mm, false, ret == 0); |
| return ret; |
| } |
| |
| static inline bool mmap_read_trylock(struct mm_struct *mm) |
| { |
| bool ret; |
| |
| __mmap_lock_trace_start_locking(mm, false); |
| ret = down_read_trylock(&mm->mmap_lock) != 0; |
| __mmap_lock_trace_acquire_returned(mm, false, ret); |
| return ret; |
| } |
| |
| static inline void mmap_read_unlock(struct mm_struct *mm) |
| { |
| __mmap_lock_trace_released(mm, false); |
| up_read(&mm->mmap_lock); |
| } |
| |
| DEFINE_GUARD(mmap_read_lock, struct mm_struct *, |
| mmap_read_lock(_T), mmap_read_unlock(_T)) |
| |
| static inline void mmap_read_unlock_non_owner(struct mm_struct *mm) |
| { |
| __mmap_lock_trace_released(mm, false); |
| up_read_non_owner(&mm->mmap_lock); |
| } |
| |
| static inline int mmap_lock_is_contended(struct mm_struct *mm) |
| { |
| return rwsem_is_contended(&mm->mmap_lock); |
| } |
| |
| #endif /* _LINUX_MMAP_LOCK_H */ |