| // SPDX-License-Identifier: GPL-2.0-or-later |
| /* |
| * Copyright (C) 2020-2023 Oracle. All Rights Reserved. |
| * Author: Darrick J. Wong <djwong@kernel.org> |
| */ |
| #include "xfs.h" |
| #include "xfs_fs.h" |
| #include "xfs_shared.h" |
| #include "xfs_format.h" |
| #include "xfs_trans_resv.h" |
| #include "xfs_mount.h" |
| #include "xfs_btree.h" |
| #include "xfs_log_format.h" |
| #include "xfs_trans.h" |
| #include "xfs_rtalloc.h" |
| #include "xfs_inode.h" |
| #include "xfs_bit.h" |
| #include "xfs_bmap.h" |
| #include "xfs_bmap_btree.h" |
| #include "xfs_rmap.h" |
| #include "xfs_rtrmap_btree.h" |
| #include "xfs_exchmaps.h" |
| #include "xfs_rtbitmap.h" |
| #include "xfs_rtgroup.h" |
| #include "xfs_extent_busy.h" |
| #include "xfs_refcount.h" |
| #include "scrub/scrub.h" |
| #include "scrub/common.h" |
| #include "scrub/trace.h" |
| #include "scrub/repair.h" |
| #include "scrub/xfile.h" |
| #include "scrub/tempfile.h" |
| #include "scrub/tempexch.h" |
| #include "scrub/reap.h" |
| #include "scrub/rtbitmap.h" |
| |
| /* rt bitmap content repairs */ |
| |
| /* Set up to repair the realtime bitmap for this group. */ |
| int |
| xrep_setup_rtbitmap( |
| struct xfs_scrub *sc, |
| struct xchk_rtbitmap *rtb) |
| { |
| struct xfs_mount *mp = sc->mp; |
| char *descr; |
| unsigned long long blocks = mp->m_sb.sb_rbmblocks; |
| int error; |
| |
| error = xrep_tempfile_create(sc, S_IFREG); |
| if (error) |
| return error; |
| |
| /* Create an xfile to hold our reconstructed bitmap. */ |
| descr = xchk_xfile_rtgroup_descr(sc, "bitmap file"); |
| error = xfile_create(descr, blocks * mp->m_sb.sb_blocksize, &sc->xfile); |
| kfree(descr); |
| if (error) |
| return error; |
| |
| /* |
| * Reserve enough blocks to write out a completely new bitmap file, |
| * plus twice as many blocks as we would need if we can only allocate |
| * one block per data fork mapping. This should cover the |
| * preallocation of the temporary file and exchanging the extent |
| * mappings. |
| * |
| * We cannot use xfs_exchmaps_estimate because we have not yet |
| * constructed the replacement bitmap and therefore do not know how |
| * many extents it will use. By the time we do, we will have a dirty |
| * transaction (which we cannot drop because we cannot drop the |
| * rtbitmap ILOCK) and cannot ask for more reservation. |
| */ |
| blocks += xfs_bmbt_calc_size(mp, blocks) * 2; |
| if (blocks > UINT_MAX) |
| return -EOPNOTSUPP; |
| |
| rtb->resblks += blocks; |
| return 0; |
| } |
| |
| static inline xrep_wordoff_t |
| rtx_to_wordoff( |
| struct xfs_mount *mp, |
| xfs_rtxnum_t rtx) |
| { |
| return rtx >> XFS_NBWORDLOG; |
| } |
| |
| static inline xrep_wordcnt_t |
| rtxlen_to_wordcnt( |
| xfs_rtxlen_t rtxlen) |
| { |
| return rtxlen >> XFS_NBWORDLOG; |
| } |
| |
| /* Helper functions to record rtwords in an xfile. */ |
| |
| static inline int |
| xfbmp_load( |
| struct xchk_rtbitmap *rtb, |
| xrep_wordoff_t wordoff, |
| xfs_rtword_t *word) |
| { |
| union xfs_rtword_raw urk; |
| int error; |
| |
| ASSERT(xfs_has_rtgroups(rtb->sc->mp)); |
| |
| error = xfile_load(rtb->sc->xfile, &urk, |
| sizeof(union xfs_rtword_raw), |
| wordoff << XFS_WORDLOG); |
| if (error) |
| return error; |
| |
| *word = be32_to_cpu(urk.rtg); |
| return 0; |
| } |
| |
| static inline int |
| xfbmp_store( |
| struct xchk_rtbitmap *rtb, |
| xrep_wordoff_t wordoff, |
| const xfs_rtword_t word) |
| { |
| union xfs_rtword_raw urk; |
| |
| ASSERT(xfs_has_rtgroups(rtb->sc->mp)); |
| |
| urk.rtg = cpu_to_be32(word); |
| return xfile_store(rtb->sc->xfile, &urk, |
| sizeof(union xfs_rtword_raw), |
| wordoff << XFS_WORDLOG); |
| } |
| |
| static inline int |
| xfbmp_copyin( |
| struct xchk_rtbitmap *rtb, |
| xrep_wordoff_t wordoff, |
| const union xfs_rtword_raw *word, |
| xrep_wordcnt_t nr_words) |
| { |
| return xfile_store(rtb->sc->xfile, word, nr_words << XFS_WORDLOG, |
| wordoff << XFS_WORDLOG); |
| } |
| |
| static inline int |
| xfbmp_copyout( |
| struct xchk_rtbitmap *rtb, |
| xrep_wordoff_t wordoff, |
| union xfs_rtword_raw *word, |
| xrep_wordcnt_t nr_words) |
| { |
| return xfile_load(rtb->sc->xfile, word, nr_words << XFS_WORDLOG, |
| wordoff << XFS_WORDLOG); |
| } |
| |
| /* Perform a logical OR operation on an rtword in the incore bitmap. */ |
| static int |
| xrep_rtbitmap_or( |
| struct xchk_rtbitmap *rtb, |
| xrep_wordoff_t wordoff, |
| xfs_rtword_t mask) |
| { |
| xfs_rtword_t word; |
| int error; |
| |
| error = xfbmp_load(rtb, wordoff, &word); |
| if (error) |
| return error; |
| |
| trace_xrep_rtbitmap_or(rtb->sc->mp, wordoff, mask, word); |
| |
| return xfbmp_store(rtb, wordoff, word | mask); |
| } |
| |
| /* |
| * Mark as free every rt extent between the next rt block we expected to see |
| * in the rtrmap records and the given rt block. |
| */ |
| STATIC int |
| xrep_rtbitmap_mark_free( |
| struct xchk_rtbitmap *rtb, |
| xfs_rgblock_t rgbno) |
| { |
| struct xfs_mount *mp = rtb->sc->mp; |
| struct xchk_rt *sr = &rtb->sc->sr; |
| struct xfs_rtgroup *rtg = sr->rtg; |
| xfs_rtxnum_t startrtx; |
| xfs_rtxnum_t nextrtx; |
| xrep_wordoff_t wordoff, nextwordoff; |
| unsigned int bit; |
| unsigned int bufwsize; |
| xfs_extlen_t mod; |
| xfs_rtword_t mask; |
| enum xbtree_recpacking outcome; |
| int error; |
| |
| if (!xfs_verify_rgbext(rtg, rtb->next_rgbno, rgbno - rtb->next_rgbno)) |
| return -EFSCORRUPTED; |
| |
| /* |
| * Convert rt blocks to rt extents The block range we find must be |
| * aligned to an rtextent boundary on both ends. |
| */ |
| startrtx = xfs_rgbno_to_rtx(mp, rtb->next_rgbno); |
| mod = xfs_rgbno_to_rtxoff(mp, rtb->next_rgbno); |
| if (mod) |
| return -EFSCORRUPTED; |
| |
| nextrtx = xfs_rgbno_to_rtx(mp, rgbno - 1) + 1; |
| mod = xfs_rgbno_to_rtxoff(mp, rgbno - 1); |
| if (mod != mp->m_sb.sb_rextsize - 1) |
| return -EFSCORRUPTED; |
| |
| /* Must not be shared or CoW staging. */ |
| if (sr->refc_cur) { |
| error = xfs_refcount_has_records(sr->refc_cur, |
| XFS_REFC_DOMAIN_SHARED, rtb->next_rgbno, |
| rgbno - rtb->next_rgbno, &outcome); |
| if (error) |
| return error; |
| if (outcome != XBTREE_RECPACKING_EMPTY) |
| return -EFSCORRUPTED; |
| |
| error = xfs_refcount_has_records(sr->refc_cur, |
| XFS_REFC_DOMAIN_COW, rtb->next_rgbno, |
| rgbno - rtb->next_rgbno, &outcome); |
| if (error) |
| return error; |
| if (outcome != XBTREE_RECPACKING_EMPTY) |
| return -EFSCORRUPTED; |
| } |
| |
| trace_xrep_rtbitmap_record_free(mp, startrtx, nextrtx - 1); |
| |
| /* Set bits as needed to round startrtx up to the nearest word. */ |
| bit = startrtx & XREP_RTBMP_WORDMASK; |
| if (bit) { |
| xfs_rtblock_t len = nextrtx - startrtx; |
| unsigned int lastbit; |
| |
| lastbit = min(bit + len, XFS_NBWORD); |
| mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; |
| |
| error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, startrtx), |
| mask); |
| if (error || lastbit - bit == len) |
| return error; |
| startrtx += XFS_NBWORD - bit; |
| } |
| |
| /* Set bits as needed to round nextrtx down to the nearest word. */ |
| bit = nextrtx & XREP_RTBMP_WORDMASK; |
| if (bit) { |
| mask = ((xfs_rtword_t)1 << bit) - 1; |
| |
| error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, nextrtx), |
| mask); |
| if (error || startrtx + bit == nextrtx) |
| return error; |
| nextrtx -= bit; |
| } |
| |
| trace_xrep_rtbitmap_record_free_bulk(mp, startrtx, nextrtx - 1); |
| |
| /* Set all the words in between, up to a whole fs block at once. */ |
| wordoff = rtx_to_wordoff(mp, startrtx); |
| nextwordoff = rtx_to_wordoff(mp, nextrtx); |
| bufwsize = mp->m_sb.sb_blocksize >> XFS_WORDLOG; |
| |
| while (wordoff < nextwordoff) { |
| xrep_wordoff_t rem; |
| xrep_wordcnt_t wordcnt; |
| |
| wordcnt = min_t(xrep_wordcnt_t, nextwordoff - wordoff, |
| bufwsize); |
| |
| /* |
| * Try to keep us aligned to the rtwords buffer to reduce the |
| * number of xfile writes. |
| */ |
| rem = wordoff & (bufwsize - 1); |
| if (rem) |
| wordcnt = min_t(xrep_wordcnt_t, wordcnt, |
| bufwsize - rem); |
| |
| error = xfbmp_copyin(rtb, wordoff, rtb->words, wordcnt); |
| if (error) |
| return error; |
| |
| wordoff += wordcnt; |
| } |
| |
| return 0; |
| } |
| |
| /* Set free space in the rtbitmap based on rtrmapbt records. */ |
| STATIC int |
| xrep_rtbitmap_walk_rtrmap( |
| struct xfs_btree_cur *cur, |
| const struct xfs_rmap_irec *rec, |
| void *priv) |
| { |
| struct xchk_rtbitmap *rtb = priv; |
| int error = 0; |
| |
| if (xchk_should_terminate(rtb->sc, &error)) |
| return error; |
| |
| if (rtb->next_rgbno < rec->rm_startblock) { |
| error = xrep_rtbitmap_mark_free(rtb, rec->rm_startblock); |
| if (error) |
| return error; |
| } |
| |
| rtb->next_rgbno = max(rtb->next_rgbno, |
| rec->rm_startblock + rec->rm_blockcount); |
| return 0; |
| } |
| |
| /* |
| * Walk the rtrmapbt to find all the gaps between records, and mark the gaps |
| * in the realtime bitmap that we're computing. |
| */ |
| STATIC int |
| xrep_rtbitmap_find_freespace( |
| struct xchk_rtbitmap *rtb) |
| { |
| struct xfs_scrub *sc = rtb->sc; |
| struct xfs_mount *mp = sc->mp; |
| struct xfs_rtgroup *rtg = sc->sr.rtg; |
| uint64_t blockcount; |
| int error; |
| |
| /* Prepare a buffer of ones so that we can accelerate bulk setting. */ |
| memset(rtb->words, 0xFF, mp->m_sb.sb_blocksize); |
| |
| xrep_rtgroup_btcur_init(sc, &sc->sr); |
| error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_rtbitmap_walk_rtrmap, |
| rtb); |
| if (error) |
| goto out; |
| |
| /* |
| * Mark as free every possible rt extent from the last one we saw to |
| * the end of the rt group. |
| */ |
| blockcount = rtg->rtg_extents * mp->m_sb.sb_rextsize; |
| if (rtb->next_rgbno < blockcount) { |
| error = xrep_rtbitmap_mark_free(rtb, blockcount); |
| if (error) |
| goto out; |
| } |
| |
| out: |
| xchk_rtgroup_btcur_free(&sc->sr); |
| return error; |
| } |
| |
| static int |
| xrep_rtbitmap_prep_buf( |
| struct xfs_scrub *sc, |
| struct xfs_buf *bp, |
| void *data) |
| { |
| struct xchk_rtbitmap *rtb = data; |
| struct xfs_mount *mp = sc->mp; |
| union xfs_rtword_raw *ondisk; |
| int error; |
| |
| rtb->args.mp = sc->mp; |
| rtb->args.tp = sc->tp; |
| rtb->args.rbmbp = bp; |
| ondisk = xfs_rbmblock_wordptr(&rtb->args, 0); |
| rtb->args.rbmbp = NULL; |
| |
| error = xfbmp_copyout(rtb, rtb->prep_wordoff, ondisk, |
| mp->m_blockwsize); |
| if (error) |
| return error; |
| |
| if (xfs_has_rtgroups(sc->mp)) { |
| struct xfs_rtbuf_blkinfo *hdr = bp->b_addr; |
| |
| hdr->rt_magic = cpu_to_be32(XFS_RTBITMAP_MAGIC); |
| hdr->rt_owner = cpu_to_be64(sc->ip->i_ino); |
| hdr->rt_blkno = cpu_to_be64(xfs_buf_daddr(bp)); |
| hdr->rt_lsn = 0; |
| uuid_copy(&hdr->rt_uuid, &sc->mp->m_sb.sb_meta_uuid); |
| bp->b_ops = &xfs_rtbitmap_buf_ops; |
| } else { |
| bp->b_ops = &xfs_rtbuf_ops; |
| } |
| |
| rtb->prep_wordoff += mp->m_blockwsize; |
| xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_RTBITMAP_BUF); |
| return 0; |
| } |
| |
| /* |
| * Make sure that the given range of the data fork of the realtime file is |
| * mapped to written blocks. The caller must ensure that the inode is joined |
| * to the transaction. |
| */ |
| STATIC int |
| xrep_rtbitmap_data_mappings( |
| struct xfs_scrub *sc, |
| xfs_filblks_t len) |
| { |
| struct xfs_bmbt_irec map; |
| xfs_fileoff_t off = 0; |
| int error; |
| |
| ASSERT(sc->ip != NULL); |
| |
| while (off < len) { |
| int nmaps = 1; |
| |
| /* |
| * If we have a real extent mapping this block then we're |
| * in ok shape. |
| */ |
| error = xfs_bmapi_read(sc->ip, off, len - off, &map, &nmaps, |
| XFS_DATA_FORK); |
| if (error) |
| return error; |
| if (nmaps == 0) { |
| ASSERT(nmaps != 0); |
| return -EFSCORRUPTED; |
| } |
| |
| /* |
| * Written extents are ok. Holes are not filled because we |
| * do not know the freespace information. |
| */ |
| if (xfs_bmap_is_written_extent(&map) || |
| map.br_startblock == HOLESTARTBLOCK) { |
| off = map.br_startoff + map.br_blockcount; |
| continue; |
| } |
| |
| /* |
| * If we find a delalloc reservation then something is very |
| * very wrong. Bail out. |
| */ |
| if (map.br_startblock == DELAYSTARTBLOCK) |
| return -EFSCORRUPTED; |
| |
| /* Make sure we're really converting an unwritten extent. */ |
| if (map.br_state != XFS_EXT_UNWRITTEN) { |
| ASSERT(map.br_state == XFS_EXT_UNWRITTEN); |
| return -EFSCORRUPTED; |
| } |
| |
| /* Make sure this block has a real zeroed extent mapped. */ |
| nmaps = 1; |
| error = xfs_bmapi_write(sc->tp, sc->ip, map.br_startoff, |
| map.br_blockcount, |
| XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, |
| 0, &map, &nmaps); |
| if (error) |
| return error; |
| |
| /* Commit new extent and all deferred work. */ |
| error = xrep_defer_finish(sc); |
| if (error) |
| return error; |
| |
| off = map.br_startoff + map.br_blockcount; |
| } |
| |
| return 0; |
| } |
| |
| /* Fix broken rt volume geometry. */ |
| STATIC int |
| xrep_rtbitmap_geometry( |
| struct xfs_scrub *sc, |
| struct xchk_rtbitmap *rtb) |
| { |
| struct xfs_mount *mp = sc->mp; |
| struct xfs_trans *tp = sc->tp; |
| |
| /* Superblock fields */ |
| if (mp->m_sb.sb_rextents != rtb->rextents) |
| xfs_trans_mod_sb(sc->tp, XFS_TRANS_SB_REXTENTS, |
| rtb->rextents - mp->m_sb.sb_rextents); |
| |
| if (mp->m_sb.sb_rbmblocks != rtb->rbmblocks) |
| xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBMBLOCKS, |
| rtb->rbmblocks - mp->m_sb.sb_rbmblocks); |
| |
| if (mp->m_sb.sb_rextslog != rtb->rextslog) |
| xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSLOG, |
| rtb->rextslog - mp->m_sb.sb_rextslog); |
| |
| /* Fix broken isize */ |
| sc->ip->i_disk_size = roundup_64(sc->ip->i_disk_size, |
| mp->m_sb.sb_blocksize); |
| |
| if (sc->ip->i_disk_size < XFS_FSB_TO_B(mp, rtb->rbmblocks)) |
| sc->ip->i_disk_size = XFS_FSB_TO_B(mp, rtb->rbmblocks); |
| |
| xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); |
| return xrep_roll_trans(sc); |
| } |
| |
| /* Repair the realtime bitmap file metadata. */ |
| int |
| xrep_rtbitmap( |
| struct xfs_scrub *sc) |
| { |
| struct xchk_rtbitmap *rtb = sc->buf; |
| struct xfs_mount *mp = sc->mp; |
| struct xfs_group *xg = rtg_group(sc->sr.rtg); |
| unsigned long long blocks = 0; |
| unsigned int busy_gen; |
| int error; |
| |
| /* We require the realtime rmapbt to rebuild anything. */ |
| if (!xfs_has_rtrmapbt(sc->mp)) |
| return -EOPNOTSUPP; |
| /* We require atomic file exchange range to rebuild anything. */ |
| if (!xfs_has_exchange_range(sc->mp)) |
| return -EOPNOTSUPP; |
| |
| /* Impossibly large rtbitmap means we can't touch the filesystem. */ |
| if (rtb->rbmblocks > U32_MAX) |
| return 0; |
| |
| /* |
| * If the size of the rt bitmap file is larger than what we reserved, |
| * figure out if we need to adjust the block reservation in the |
| * transaction. |
| */ |
| blocks = xfs_bmbt_calc_size(mp, rtb->rbmblocks); |
| if (blocks > UINT_MAX) |
| return -EOPNOTSUPP; |
| if (blocks > rtb->resblks) { |
| error = xfs_trans_reserve_more(sc->tp, blocks, 0); |
| if (error) |
| return error; |
| |
| rtb->resblks += blocks; |
| } |
| |
| /* Fix inode core and forks. */ |
| error = xrep_metadata_inode_forks(sc); |
| if (error) |
| return error; |
| |
| xfs_trans_ijoin(sc->tp, sc->ip, 0); |
| |
| /* Ensure no unwritten extents. */ |
| error = xrep_rtbitmap_data_mappings(sc, rtb->rbmblocks); |
| if (error) |
| return error; |
| |
| /* |
| * Fix inconsistent bitmap geometry. This function returns with a |
| * clean scrub transaction. |
| */ |
| error = xrep_rtbitmap_geometry(sc, rtb); |
| if (error) |
| return error; |
| |
| /* |
| * Make sure the busy extent list is clear because we can't put extents |
| * on there twice. |
| */ |
| if (!xfs_extent_busy_list_empty(xg, &busy_gen)) { |
| error = xfs_extent_busy_flush(sc->tp, xg, busy_gen, 0); |
| if (error) |
| return error; |
| } |
| |
| /* |
| * Generate the new rtbitmap data. We don't need the rtbmp information |
| * once this call is finished. |
| */ |
| error = xrep_rtbitmap_find_freespace(rtb); |
| if (error) |
| return error; |
| |
| /* |
| * Try to take ILOCK_EXCL of the temporary file. We had better be the |
| * only ones holding onto this inode, but we can't block while holding |
| * the rtbitmap file's ILOCK_EXCL. |
| */ |
| while (!xrep_tempfile_ilock_nowait(sc)) { |
| if (xchk_should_terminate(sc, &error)) |
| return error; |
| delay(1); |
| } |
| |
| /* |
| * Make sure we have space allocated for the part of the bitmap |
| * file that corresponds to this group. We already joined sc->ip. |
| */ |
| xfs_trans_ijoin(sc->tp, sc->tempip, 0); |
| error = xrep_tempfile_prealloc(sc, 0, rtb->rbmblocks); |
| if (error) |
| return error; |
| |
| /* Last chance to abort before we start committing fixes. */ |
| if (xchk_should_terminate(sc, &error)) |
| return error; |
| |
| /* Copy the bitmap file that we generated. */ |
| error = xrep_tempfile_copyin(sc, 0, rtb->rbmblocks, |
| xrep_rtbitmap_prep_buf, rtb); |
| if (error) |
| return error; |
| error = xrep_tempfile_set_isize(sc, |
| XFS_FSB_TO_B(sc->mp, sc->mp->m_sb.sb_rbmblocks)); |
| if (error) |
| return error; |
| |
| /* |
| * Now exchange the data fork contents. We're done with the temporary |
| * buffer, so we can reuse it for the tempfile exchmaps information. |
| */ |
| error = xrep_tempexch_trans_reserve(sc, XFS_DATA_FORK, 0, |
| rtb->rbmblocks, &rtb->tempexch); |
| if (error) |
| return error; |
| |
| error = xrep_tempexch_contents(sc, &rtb->tempexch); |
| if (error) |
| return error; |
| |
| /* Free the old rtbitmap blocks if they're not in use. */ |
| return xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK); |
| } |