fs/xfs/scrub/iscan.c - linux - Git at Google

 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
  * Author: Darrick J. Wong <djwong@kernel.org>
  */
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
 #include "xfs_log_format.h"
 #include "xfs_trans.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_ialloc.h"
 #include "xfs_ialloc_btree.h"
 #include "xfs_ag.h"
 #include "xfs_error.h"
 #include "xfs_bit.h"
 #include "xfs_icache.h"
 #include "scrub/scrub.h"
 #include "scrub/iscan.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"

 /*
  * Live File Scan
  * ==============
  *
  * Live file scans walk every inode in a live filesystem.  This is more or
  * less like a regular iwalk, except that when we're advancing the scan cursor,
  * we must ensure that inodes cannot be added or deleted anywhere between the
  * old cursor value and the new cursor value.  If we're advancing the cursor
  * by one inode, the caller must hold that inode; if we're finding the next
  * inode to scan, we must grab the AGI and hold it until we've updated the
  * scan cursor.
  *
  * Callers are expected to use this code to scan all files in the filesystem to
  * construct a new metadata index of some kind.  The scan races against other
  * live updates, which means there must be a provision to update the new index
  * when updates are made to inodes that already been scanned.  The iscan lock
  * can be used in live update hook code to stop the scan and protect this data
  * structure.
  *
  * To keep the new index up to date with other metadata updates being made to
  * the live filesystem, it is assumed that the caller will add hooks as needed
  * to be notified when a metadata update occurs.  The inode scanner must tell
  * the hook code when an inode has been visited with xchk_iscan_mark_visit.
  * Hook functions can use xchk_iscan_want_live_update to decide if the
  * scanner's observations must be updated.
  */

 /*
  * If the inobt record @rec covers @iscan->skip_ino, mark the inode free so
  * that the scan ignores that inode.
  */
 STATIC void
 xchk_iscan_mask_skipino(
 	struct xchk_iscan	*iscan,
 	struct xfs_perag	*pag,
 	struct xfs_inobt_rec_incore	*rec,
 	xfs_agino_t		lastrecino)
 {
 	struct xfs_scrub	*sc = iscan->sc;
 	struct xfs_mount	*mp = sc->mp;
 	xfs_agnumber_t		skip_agno = XFS_INO_TO_AGNO(mp, iscan->skip_ino);
 	xfs_agnumber_t		skip_agino = XFS_INO_TO_AGINO(mp, iscan->skip_ino);

 	if (pag->pag_agno != skip_agno)
 		return;
 	if (skip_agino < rec->ir_startino)
 		return;
 	if (skip_agino > lastrecino)
 		return;

 	rec->ir_free |= xfs_inobt_maskn(skip_agino - rec->ir_startino, 1);
 }

 /*
  * Set *cursor to the next allocated inode after whatever it's set to now.
  * If there are no more inodes in this AG, cursor is set to NULLAGINO.
  */
 STATIC int
 xchk_iscan_find_next(
 	struct xchk_iscan	*iscan,
 	struct xfs_buf		*agi_bp,
 	struct xfs_perag	*pag,
 	xfs_inofree_t		*allocmaskp,
 	xfs_agino_t		*cursor,
 	uint8_t			*nr_inodesp)
 {
 	struct xfs_scrub	*sc = iscan->sc;
 	struct xfs_inobt_rec_incore	rec;
 	struct xfs_btree_cur	*cur;
 	struct xfs_mount	*mp = sc->mp;
 	struct xfs_trans	*tp = sc->tp;
 	xfs_agnumber_t		agno = pag->pag_agno;
 	xfs_agino_t		lastino = NULLAGINO;
 	xfs_agino_t		first, last;
 	xfs_agino_t		agino = *cursor;
 	int			has_rec;
 	int			error;

 	/* If the cursor is beyond the end of this AG, move to the next one. */
 	xfs_agino_range(mp, agno, &first, &last);
 	if (agino > last) {
 		*cursor = NULLAGINO;
 		return 0;
 	}

 	/*
 	 * Look up the inode chunk for the current cursor position.  If there
 	 * is no chunk here, we want the next one.
 	 */
 	cur = xfs_inobt_init_cursor(pag, tp, agi_bp);
 	error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has_rec);
 	if (!error && !has_rec)
 		error = xfs_btree_increment(cur, 0, &has_rec);
 	for (; !error; error = xfs_btree_increment(cur, 0, &has_rec)) {
 		xfs_inofree_t	allocmask;

 		/*
 		 * If we've run out of inobt records in this AG, move the
 		 * cursor on to the next AG and exit.  The caller can try
 		 * again with the next AG.
 		 */
 		if (!has_rec) {
 			*cursor = NULLAGINO;
 			break;
 		}

 		error = xfs_inobt_get_rec(cur, &rec, &has_rec);
 		if (error)
 			break;
 		if (!has_rec) {
 			error = -EFSCORRUPTED;
 			break;
 		}

 		/* Make sure that we always move forward. */
 		if (lastino != NULLAGINO &&
 		    XFS_IS_CORRUPT(mp, lastino >= rec.ir_startino)) {
 			error = -EFSCORRUPTED;
 			break;
 		}
 		lastino = rec.ir_startino + XFS_INODES_PER_CHUNK - 1;

 		/*
 		 * If this record only covers inodes that come before the
 		 * cursor, advance to the next record.
 		 */
 		if (rec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
 			continue;

 		if (iscan->skip_ino)
 			xchk_iscan_mask_skipino(iscan, pag, &rec, lastino);

 		/*
 		 * If the incoming lookup put us in the middle of an inobt
 		 * record, mark it and the previous inodes "free" so that the
 		 * search for allocated inodes will start at the cursor.
 		 * We don't care about ir_freecount here.
 		 */
 		if (agino >= rec.ir_startino)
 			rec.ir_free |= xfs_inobt_maskn(0,
 						agino + 1 - rec.ir_startino);

 		/*
 		 * If there are allocated inodes in this chunk, find them
 		 * and update the scan cursor.
 		 */
 		allocmask = ~rec.ir_free;
 		if (hweight64(allocmask) > 0) {
 			int	next = xfs_lowbit64(allocmask);

 			ASSERT(next >= 0);
 			*cursor = rec.ir_startino + next;
 			*allocmaskp = allocmask >> next;
 			*nr_inodesp = XFS_INODES_PER_CHUNK - next;
 			break;
 		}
 	}

 	xfs_btree_del_cursor(cur, error);
 	return error;
 }

 /*
  * Advance both the scan and the visited cursors.
  *
  * The inumber address space for a given filesystem is sparse, which means that
  * the scan cursor can jump a long ways in a single iter() call.  There are no
  * inodes in these sparse areas, so we must move the visited cursor forward at
  * the same time so that the scan user can receive live updates for inodes that
  * may get created once we release the AGI buffer.
  */
 static inline void
 xchk_iscan_move_cursor(
 	struct xchk_iscan	*iscan,
 	xfs_agnumber_t		agno,
 	xfs_agino_t		agino)
 {
 	struct xfs_scrub	*sc = iscan->sc;
 	struct xfs_mount	*mp = sc->mp;
 	xfs_ino_t		cursor, visited;

 	BUILD_BUG_ON(XFS_MAXINUMBER == NULLFSINO);

 	/*
 	 * Special-case ino == 0 here so that we never set visited_ino to
 	 * NULLFSINO when wrapping around EOFS, for that will let through all
 	 * live updates.
 	 */
 	cursor = XFS_AGINO_TO_INO(mp, agno, agino);
 	if (cursor == 0)
 		visited = XFS_MAXINUMBER;
 	else
 		visited = cursor - 1;

 	mutex_lock(&iscan->lock);
 	iscan->cursor_ino = cursor;
 	iscan->__visited_ino = visited;
 	trace_xchk_iscan_move_cursor(iscan);
 	mutex_unlock(&iscan->lock);
 }

 /*
  * Prepare to return agno/agino to the iscan caller by moving the lastino
  * cursor to the previous inode.  Do this while we still hold the AGI so that
  * no other threads can create or delete inodes in this AG.
  */
 static inline void
 xchk_iscan_finish(
 	struct xchk_iscan	*iscan)
 {
 	mutex_lock(&iscan->lock);
 	iscan->cursor_ino = NULLFSINO;

 	/* All live updates will be applied from now on */
 	iscan->__visited_ino = NULLFSINO;

 	mutex_unlock(&iscan->lock);
 }

 /* Mark an inode scan finished before we actually scan anything. */
 void
 xchk_iscan_finish_early(
 	struct xchk_iscan	*iscan)
 {
 	ASSERT(iscan->cursor_ino == iscan->scan_start_ino);
 	ASSERT(iscan->__visited_ino == iscan->scan_start_ino);

 	xchk_iscan_finish(iscan);
 }

 /*
  * Grab the AGI to advance the inode scan.  Returns 0 if *agi_bpp is now set,
  * -ECANCELED if the live scan aborted, -EBUSY if the AGI could not be grabbed,
  * or the usual negative errno.
  */
 STATIC int
 xchk_iscan_read_agi(
 	struct xchk_iscan	*iscan,
 	struct xfs_perag	*pag,
 	struct xfs_buf		**agi_bpp)
 {
 	struct xfs_scrub	*sc = iscan->sc;
 	unsigned long		relax;
 	int			ret;

 	if (!xchk_iscan_agi_needs_trylock(iscan))
 		return xfs_ialloc_read_agi(pag, sc->tp, 0, agi_bpp);

 	relax = msecs_to_jiffies(iscan->iget_retry_delay);
 	do {
 		ret = xfs_ialloc_read_agi(pag, sc->tp, XFS_IALLOC_FLAG_TRYLOCK,
 				agi_bpp);
 		if (ret != -EAGAIN)
 			return ret;
 		if (!iscan->iget_timeout ||
 		    time_is_before_jiffies(iscan->__iget_deadline))
 			return -EBUSY;

 		trace_xchk_iscan_agi_retry_wait(iscan);
 	} while (!schedule_timeout_killable(relax) &&
 		 !xchk_iscan_aborted(iscan));
 	return -ECANCELED;
 }

 /*
  * Advance ino to the next inode that the inobt thinks is allocated, being
  * careful to jump to the next AG if we've reached the right end of this AG's
  * inode btree.  Advancing ino effectively means that we've pushed the inode
  * scan forward, so set the iscan cursor to (ino - 1) so that our live update
  * predicates will track inode allocations in that part of the inode number
  * key space once we release the AGI buffer.
  *
  * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes,
  * -ECANCELED if the live scan aborted, or the usual negative errno.
  */
 STATIC int
 xchk_iscan_advance(
 	struct xchk_iscan	*iscan,
 	struct xfs_perag	**pagp,
 	struct xfs_buf		**agi_bpp,
 	xfs_inofree_t		*allocmaskp,
 	uint8_t			*nr_inodesp)
 {
 	struct xfs_scrub	*sc = iscan->sc;
 	struct xfs_mount	*mp = sc->mp;
 	struct xfs_buf		*agi_bp;
 	struct xfs_perag	*pag;
 	xfs_agnumber_t		agno;
 	xfs_agino_t		agino;
 	int			ret;

 	ASSERT(iscan->cursor_ino >= iscan->__visited_ino);

 	do {
 		if (xchk_iscan_aborted(iscan))
 			return -ECANCELED;

 		agno = XFS_INO_TO_AGNO(mp, iscan->cursor_ino);
 		pag = xfs_perag_get(mp, agno);
 		if (!pag)
 			return -ECANCELED;

 		ret = xchk_iscan_read_agi(iscan, pag, &agi_bp);
 		if (ret)
 			goto out_pag;

 		agino = XFS_INO_TO_AGINO(mp, iscan->cursor_ino);
 		ret = xchk_iscan_find_next(iscan, agi_bp, pag, allocmaskp,
 				&agino, nr_inodesp);
 		if (ret)
 			goto out_buf;

 		if (agino != NULLAGINO) {
 			/*
 			 * Found the next inode in this AG, so return it along
 			 * with the AGI buffer and the perag structure to
 			 * ensure it cannot go away.
 			 */
 			xchk_iscan_move_cursor(iscan, agno, agino);
 			*agi_bpp = agi_bp;
 			*pagp = pag;
 			return 1;
 		}

 		/*
 		 * Did not find any more inodes in this AG, move on to the next
 		 * AG.
 		 */
 		agno = (agno + 1) % mp->m_sb.sb_agcount;
 		xchk_iscan_move_cursor(iscan, agno, 0);
 		xfs_trans_brelse(sc->tp, agi_bp);
 		xfs_perag_put(pag);

 		trace_xchk_iscan_advance_ag(iscan);
 	} while (iscan->cursor_ino != iscan->scan_start_ino);

 	xchk_iscan_finish(iscan);
 	return 0;

 out_buf:
 	xfs_trans_brelse(sc->tp, agi_bp);
 out_pag:
 	xfs_perag_put(pag);
 	return ret;
 }

 /*
  * Grabbing the inode failed, so we need to back up the scan and ask the caller
  * to try to _advance the scan again.  Returns -EBUSY if we've run out of retry
  * opportunities, -ECANCELED if the process has a fatal signal pending, or
  * -EAGAIN if we should try again.
  */
 STATIC int
 xchk_iscan_iget_retry(
 	struct xchk_iscan	*iscan,
 	bool			wait)
 {
 	ASSERT(iscan->cursor_ino == iscan->__visited_ino + 1);

 	if (!iscan->iget_timeout ||
 	    time_is_before_jiffies(iscan->__iget_deadline))
 		return -EBUSY;

 	if (wait) {
 		unsigned long	relax;

 		/*
 		 * Sleep for a period of time to let the rest of the system
 		 * catch up.  If we return early, someone sent a kill signal to
 		 * the calling process.
 		 */
 		relax = msecs_to_jiffies(iscan->iget_retry_delay);
 		trace_xchk_iscan_iget_retry_wait(iscan);

 		if (schedule_timeout_killable(relax) ||
 		    xchk_iscan_aborted(iscan))
 			return -ECANCELED;
 	}

 	iscan->cursor_ino--;
 	return -EAGAIN;
 }

 /*
  * For an inode scan, we hold the AGI and want to try to grab a batch of
  * inodes.  Holding the AGI prevents inodegc from clearing freed inodes,
  * so we must use noretry here.  For every inode after the first one in the
  * batch, we don't want to wait, so we use retry there too.  Finally, use
  * dontcache to avoid polluting the cache.
  */
 #define ISCAN_IGET_FLAGS	(XFS_IGET_NORETRY | XFS_IGET_DONTCACHE)

 /*
  * Grab an inode as part of an inode scan.  While scanning this inode, the
  * caller must ensure that no other threads can modify the inode until a call
  * to xchk_iscan_visit succeeds.
  *
  * Returns the number of incore inodes grabbed; -EAGAIN if the caller should
  * call again xchk_iscan_advance; -EBUSY if we couldn't grab an inode;
  * -ECANCELED if there's a fatal signal pending; or some other negative errno.
  */
 STATIC int
 xchk_iscan_iget(
 	struct xchk_iscan	*iscan,
 	struct xfs_perag	*pag,
 	struct xfs_buf		*agi_bp,
 	xfs_inofree_t		allocmask,
 	uint8_t			nr_inodes)
 {
 	struct xfs_scrub	*sc = iscan->sc;
 	struct xfs_mount	*mp = sc->mp;
 	xfs_ino_t		ino = iscan->cursor_ino;
 	unsigned int		idx = 0;
 	unsigned int		i;
 	int			error;

 	ASSERT(iscan->__inodes[0] == NULL);

 	/* Fill the first slot in the inode array. */
 	error = xfs_iget(sc->mp, sc->tp, ino, ISCAN_IGET_FLAGS, 0,
 			&iscan->__inodes[idx]);

 	trace_xchk_iscan_iget(iscan, error);

 	if (error == -ENOENT || error == -EAGAIN) {
 		xfs_trans_brelse(sc->tp, agi_bp);
 		xfs_perag_put(pag);

 		/*
 		 * It's possible that this inode has lost all of its links but
 		 * hasn't yet been inactivated.  If we don't have a transaction
 		 * or it's not writable, flush the inodegc workers and wait.
 		 * If we have a non-empty transaction, we must not block on
 		 * inodegc, which allocates its own transactions.
 		 */
 		if (sc->tp && !(sc->tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
 			xfs_inodegc_push(mp);
 		else
 			xfs_inodegc_flush(mp);
 		return xchk_iscan_iget_retry(iscan, true);
 	}

 	if (error == -EINVAL) {
 		xfs_trans_brelse(sc->tp, agi_bp);
 		xfs_perag_put(pag);

 		/*
 		 * We thought the inode was allocated, but the inode btree
 		 * lookup failed, which means that it was freed since the last
 		 * time we advanced the cursor.  Back up and try again.  This
 		 * should never happen since still hold the AGI buffer from the
 		 * inobt check, but we need to be careful about infinite loops.
 		 */
 		return xchk_iscan_iget_retry(iscan, false);
 	}

 	if (error) {
 		xfs_trans_brelse(sc->tp, agi_bp);
 		xfs_perag_put(pag);
 		return error;
 	}
 	idx++;
 	ino++;
 	allocmask >>= 1;

 	/*
 	 * Now that we've filled the first slot in __inodes, try to fill the
 	 * rest of the batch with consecutively ordered inodes.  to reduce the
 	 * number of _iter calls.  Make a bitmap of unallocated inodes from the
 	 * zeroes in the inuse bitmap; these inodes will not be scanned, but
 	 * the _want_live_update predicate will pass through all live updates.
 	 *
 	 * If we can't iget an allocated inode, stop and return what we have.
 	 */
 	mutex_lock(&iscan->lock);
 	iscan->__batch_ino = ino - 1;
 	iscan->__skipped_inomask = 0;
 	mutex_unlock(&iscan->lock);

 	for (i = 1; i < nr_inodes; i++, ino++, allocmask >>= 1) {
 		if (!(allocmask & 1)) {
 			ASSERT(!(iscan->__skipped_inomask & (1ULL << i)));

 			mutex_lock(&iscan->lock);
 			iscan->cursor_ino = ino;
 			iscan->__skipped_inomask |= (1ULL << i);
 			mutex_unlock(&iscan->lock);
 			continue;
 		}

 		ASSERT(iscan->__inodes[idx] == NULL);

 		error = xfs_iget(sc->mp, sc->tp, ino, ISCAN_IGET_FLAGS, 0,
 				&iscan->__inodes[idx]);
 		if (error)
 			break;

 		mutex_lock(&iscan->lock);
 		iscan->cursor_ino = ino;
 		mutex_unlock(&iscan->lock);
 		idx++;
 	}

 	trace_xchk_iscan_iget_batch(sc->mp, iscan, nr_inodes, idx);
 	xfs_trans_brelse(sc->tp, agi_bp);
 	xfs_perag_put(pag);
 	return idx;
 }

 /*
  * Advance the visit cursor to reflect skipped inodes beyond whatever we
  * scanned.
  */
 STATIC void
 xchk_iscan_finish_batch(
 	struct xchk_iscan	*iscan)
 {
 	xfs_ino_t		highest_skipped;

 	mutex_lock(&iscan->lock);

 	if (iscan->__batch_ino != NULLFSINO) {
 		highest_skipped = iscan->__batch_ino +
 					xfs_highbit64(iscan->__skipped_inomask);
 		iscan->__visited_ino = max(iscan->__visited_ino,
 					   highest_skipped);

 		trace_xchk_iscan_skip(iscan);
 	}

 	iscan->__batch_ino = NULLFSINO;
 	iscan->__skipped_inomask = 0;

 	mutex_unlock(&iscan->lock);
 }

 /*
  * Advance the inode scan cursor to the next allocated inode and return up to
  * 64 consecutive allocated inodes starting with the cursor position.
  */
 STATIC int
 xchk_iscan_iter_batch(
 	struct xchk_iscan	*iscan)
 {
 	struct xfs_scrub	*sc = iscan->sc;
 	int			ret;

 	xchk_iscan_finish_batch(iscan);

 	if (iscan->iget_timeout)
 		iscan->__iget_deadline = jiffies +
 					 msecs_to_jiffies(iscan->iget_timeout);

 	do {
 		struct xfs_buf	*agi_bp = NULL;
 		struct xfs_perag *pag = NULL;
 		xfs_inofree_t	allocmask = 0;
 		uint8_t		nr_inodes = 0;

 		ret = xchk_iscan_advance(iscan, &pag, &agi_bp, &allocmask,
 				&nr_inodes);
 		if (ret != 1)
 			return ret;

 		if (xchk_iscan_aborted(iscan)) {
 			xfs_trans_brelse(sc->tp, agi_bp);
 			xfs_perag_put(pag);
 			ret = -ECANCELED;
 			break;
 		}

 		ret = xchk_iscan_iget(iscan, pag, agi_bp, allocmask, nr_inodes);
 	} while (ret == -EAGAIN);

 	return ret;
 }

 /*
  * Advance the inode scan cursor to the next allocated inode and return the
  * incore inode structure associated with it.
  *
  * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes,
  * -ECANCELED if the live scan aborted, -EBUSY if the incore inode could not be
  * grabbed, or the usual negative errno.
  *
  * If the function returns -EBUSY and the caller can handle skipping an inode,
  * it may call this function again to continue the scan with the next allocated
  * inode.
  */
 int
 xchk_iscan_iter(
 	struct xchk_iscan	*iscan,
 	struct xfs_inode	**ipp)
 {
 	unsigned int		i;
 	int			error;

 	/* Find a cached inode, or go get another batch. */
 	for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
 		if (iscan->__inodes[i])
 			goto foundit;
 	}

 	error = xchk_iscan_iter_batch(iscan);
 	if (error <= 0)
 		return error;

 	ASSERT(iscan->__inodes[0] != NULL);
 	i = 0;

 foundit:
 	/* Give the caller our reference. */
 	*ipp = iscan->__inodes[i];
 	iscan->__inodes[i] = NULL;
 	return 1;
 }

 /* Clean up an xfs_iscan_iter call by dropping any inodes that we still hold. */
 void
 xchk_iscan_iter_finish(
 	struct xchk_iscan	*iscan)
 {
 	struct xfs_scrub	*sc = iscan->sc;
 	unsigned int		i;

 	for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
 		if (iscan->__inodes[i]) {
 			xchk_irele(sc, iscan->__inodes[i]);
 			iscan->__inodes[i] = NULL;
 		}
 	}
 }

 /* Mark this inode scan finished and release resources. */
 void
 xchk_iscan_teardown(
 	struct xchk_iscan	*iscan)
 {
 	xchk_iscan_iter_finish(iscan);
 	xchk_iscan_finish(iscan);
 	mutex_destroy(&iscan->lock);
 }

 /* Pick an AG from which to start a scan. */
 static inline xfs_ino_t
 xchk_iscan_rotor(
 	struct xfs_mount	*mp)
 {
 	static atomic_t		agi_rotor;
 	unsigned int		r = atomic_inc_return(&agi_rotor) - 1;

 	/*
 	 * Rotoring *backwards* through the AGs, so we add one here before
 	 * subtracting from the agcount to arrive at an AG number.
 	 */
 	r = (r % mp->m_sb.sb_agcount) + 1;

 	return XFS_AGINO_TO_INO(mp, mp->m_sb.sb_agcount - r, 0);
 }

 /*
  * Set ourselves up to start an inode scan.  If the @iget_timeout and
  * @iget_retry_delay parameters are set, the scan will try to iget each inode
  * for @iget_timeout milliseconds.  If an iget call indicates that the inode is
  * waiting to be inactivated, the CPU will relax for @iget_retry_delay
  * milliseconds after pushing the inactivation workers.
  */
 void
 xchk_iscan_start(
 	struct xfs_scrub	*sc,
 	unsigned int		iget_timeout,
 	unsigned int		iget_retry_delay,
 	struct xchk_iscan	*iscan)
 {
 	xfs_ino_t		start_ino;

 	start_ino = xchk_iscan_rotor(sc->mp);

 	iscan->__batch_ino = NULLFSINO;
 	iscan->__skipped_inomask = 0;

 	iscan->sc = sc;
 	clear_bit(XCHK_ISCAN_OPSTATE_ABORTED, &iscan->__opstate);
 	iscan->iget_timeout = iget_timeout;
 	iscan->iget_retry_delay = iget_retry_delay;
 	iscan->__visited_ino = start_ino;
 	iscan->cursor_ino = start_ino;
 	iscan->scan_start_ino = start_ino;
 	mutex_init(&iscan->lock);
 	memset(iscan->__inodes, 0, sizeof(iscan->__inodes));

 	trace_xchk_iscan_start(iscan, start_ino);
 }

 /*
  * Mark this inode as having been visited.  Callers must hold a sufficiently
  * exclusive lock on the inode to prevent concurrent modifications.
  */
 void
 xchk_iscan_mark_visited(
 	struct xchk_iscan	*iscan,
 	struct xfs_inode	*ip)
 {
 	mutex_lock(&iscan->lock);
 	iscan->__visited_ino = ip->i_ino;
 	trace_xchk_iscan_visit(iscan);
 	mutex_unlock(&iscan->lock);
 }

 /*
  * Did we skip this inode because it wasn't allocated when we loaded the batch?
  * If so, it is newly allocated and will not be scanned.  All live updates to
  * this inode must be passed to the caller to maintain scan correctness.
  */
 static inline bool
 xchk_iscan_skipped(
 	const struct xchk_iscan	*iscan,
 	xfs_ino_t		ino)
 {
 	if (iscan->__batch_ino == NULLFSINO)
 		return false;
 	if (ino < iscan->__batch_ino)
 		return false;
 	if (ino >= iscan->__batch_ino + XFS_INODES_PER_CHUNK)
 		return false;

 	return iscan->__skipped_inomask & (1ULL << (ino - iscan->__batch_ino));
 }

 /*
  * Do we need a live update for this inode?  This is true if the scanner thread
  * has visited this inode and the scan hasn't been aborted due to errors.
  * Callers must hold a sufficiently exclusive lock on the inode to prevent
  * scanners from reading any inode metadata.
  */
 bool
 xchk_iscan_want_live_update(
 	struct xchk_iscan	*iscan,
 	xfs_ino_t		ino)
 {
 	bool			ret = false;

 	if (xchk_iscan_aborted(iscan))
 		return false;

 	mutex_lock(&iscan->lock);

 	trace_xchk_iscan_want_live_update(iscan, ino);

 	/* Scan is finished, caller should receive all updates. */
 	if (iscan->__visited_ino == NULLFSINO) {
 		ret = true;
 		goto unlock;
 	}

 	/*
 	 * No inodes have been visited yet, so the visited cursor points at the
 	 * start of the scan range.  The caller should not receive any updates.
 	 */
 	if (iscan->scan_start_ino == iscan->__visited_ino) {
 		ret = false;
 		goto unlock;
 	}

 	/*
 	 * This inode was not allocated at the time of the iscan batch.
 	 * The caller should receive all updates.
 	 */
 	if (xchk_iscan_skipped(iscan, ino)) {
 		ret = true;
 		goto unlock;
 	}

 	/*
 	 * The visited cursor hasn't yet wrapped around the end of the FS.  If
 	 * @ino is inside the starred range, the caller should receive updates:
 	 *
 	 * 0 ------------ S ************ V ------------ EOFS
 	 */
 	if (iscan->scan_start_ino <= iscan->__visited_ino) {
 		if (ino >= iscan->scan_start_ino &&
 		    ino <= iscan->__visited_ino)
 			ret = true;

 		goto unlock;
 	}

 	/*
 	 * The visited cursor wrapped around the end of the FS.  If @ino is
 	 * inside the starred range, the caller should receive updates:
 	 *
 	 * 0 ************ V ------------ S ************ EOFS
 	 */
 	if (ino >= iscan->scan_start_ino || ino <= iscan->__visited_ino)
 		ret = true;

 unlock:
 	mutex_unlock(&iscan->lock);
 	return ret;
 }