Commit 510a28e1 authored by Darrick J. Wong's avatar Darrick J. Wong
Browse files

xfs: don't allocate scrub contexts on the stack



Convert the on-stack scrub context, btree scrub context, and da btree
scrub context into a heap allocation so that we reduce stack usage and
gain the ability to handle tall btrees without issue.

Specifically, this saves us ~208 bytes for the dabtree scrub, ~464 bytes
for the btree scrub, and ~200 bytes for the main scrub context.

Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarChandan Babu R <chandan.babu@oracle.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent ae127f08
Loading
Loading
Loading
Loading
+31 −23
Original line number Diff line number Diff line
@@ -627,15 +627,8 @@ xchk_btree(
	const struct xfs_owner_info	*oinfo,
	void				*private)
{
	struct xchk_btree		bs = {
		.cur			= cur,
		.scrub_rec		= scrub_fn,
		.oinfo			= oinfo,
		.firstrec		= true,
		.private		= private,
		.sc			= sc,
	};
	union xfs_btree_ptr		ptr;
	struct xchk_btree		*bs;
	union xfs_btree_ptr		*pp;
	union xfs_btree_rec		*recp;
	struct xfs_btree_block		*block;
@@ -646,10 +639,24 @@ xchk_btree(
	int				i;
	int				error = 0;

	/*
	 * Allocate the btree scrub context from the heap, because this
	 * structure can get rather large.
	 */
	bs = kmem_zalloc(sizeof(struct xchk_btree), KM_NOFS | KM_MAYFAIL);
	if (!bs)
		return -ENOMEM;
	bs->cur = cur;
	bs->scrub_rec = scrub_fn;
	bs->oinfo = oinfo;
	bs->firstrec = true;
	bs->private = private;
	bs->sc = sc;

	/* Initialize scrub state */
	for (i = 0; i < XFS_BTREE_MAXLEVELS; i++)
		bs.firstkey[i] = true;
	INIT_LIST_HEAD(&bs.to_check);
		bs->firstkey[i] = true;
	INIT_LIST_HEAD(&bs->to_check);

	/* Don't try to check a tree with a height we can't handle. */
	if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) {
@@ -663,9 +670,9 @@ xchk_btree(
	 */
	level = cur->bc_nlevels - 1;
	cur->bc_ops->init_ptr_from_cur(cur, &ptr);
	if (!xchk_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr))
	if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr))
		goto out;
	error = xchk_btree_get_block(&bs, level, &ptr, &block, &bp);
	error = xchk_btree_get_block(bs, level, &ptr, &block, &bp);
	if (error || !block)
		goto out;

@@ -678,7 +685,7 @@ xchk_btree(
			/* End of leaf, pop back towards the root. */
			if (cur->bc_ptrs[level] >
			    be16_to_cpu(block->bb_numrecs)) {
				xchk_btree_block_keys(&bs, level, block);
				xchk_btree_block_keys(bs, level, block);
				if (level < cur->bc_nlevels - 1)
					cur->bc_ptrs[level + 1]++;
				level++;
@@ -686,11 +693,11 @@ xchk_btree(
			}

			/* Records in order for scrub? */
			xchk_btree_rec(&bs);
			xchk_btree_rec(bs);

			/* Call out to the record checker. */
			recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
			error = bs.scrub_rec(&bs, recp);
			error = bs->scrub_rec(bs, recp);
			if (error)
				break;
			if (xchk_should_terminate(sc, &error) ||
@@ -703,7 +710,7 @@ xchk_btree(

		/* End of node, pop back towards the root. */
		if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
			xchk_btree_block_keys(&bs, level, block);
			xchk_btree_block_keys(bs, level, block);
			if (level < cur->bc_nlevels - 1)
				cur->bc_ptrs[level + 1]++;
			level++;
@@ -711,16 +718,16 @@ xchk_btree(
		}

		/* Keys in order for scrub? */
		xchk_btree_key(&bs, level);
		xchk_btree_key(bs, level);

		/* Drill another level deeper. */
		pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
		if (!xchk_btree_ptr_ok(&bs, level, pp)) {
		if (!xchk_btree_ptr_ok(bs, level, pp)) {
			cur->bc_ptrs[level]++;
			continue;
		}
		level--;
		error = xchk_btree_get_block(&bs, level, pp, &block, &bp);
		error = xchk_btree_get_block(bs, level, pp, &block, &bp);
		if (error || !block)
			goto out;

@@ -729,13 +736,14 @@ xchk_btree(

out:
	/* Process deferred owner checks on btree blocks. */
	list_for_each_entry_safe(co, n, &bs.to_check, list) {
		if (!error && bs.cur)
			error = xchk_btree_check_block_owner(&bs,
					co->level, co->daddr);
	list_for_each_entry_safe(co, n, &bs->to_check, list) {
		if (!error && bs->cur)
			error = xchk_btree_check_block_owner(bs, co->level,
					co->daddr);
		list_del(&co->list);
		kmem_free(co);
	}
	kmem_free(bs);

	return error;
}
+33 −29
Original line number Diff line number Diff line
@@ -473,7 +473,7 @@ xchk_da_btree(
	xchk_da_btree_rec_fn		scrub_fn,
	void				*private)
{
	struct xchk_da_btree		ds = {};
	struct xchk_da_btree		*ds;
	struct xfs_mount		*mp = sc->mp;
	struct xfs_da_state_blk		*blks;
	struct xfs_da_node_entry	*key;
@@ -486,32 +486,35 @@ xchk_da_btree(
		return 0;

	/* Set up initial da state. */
	ds.dargs.dp = sc->ip;
	ds.dargs.whichfork = whichfork;
	ds.dargs.trans = sc->tp;
	ds.dargs.op_flags = XFS_DA_OP_OKNOENT;
	ds.state = xfs_da_state_alloc(&ds.dargs);
	ds.sc = sc;
	ds.private = private;
	ds = kmem_zalloc(sizeof(struct xchk_da_btree), KM_NOFS | KM_MAYFAIL);
	if (!ds)
		return -ENOMEM;
	ds->dargs.dp = sc->ip;
	ds->dargs.whichfork = whichfork;
	ds->dargs.trans = sc->tp;
	ds->dargs.op_flags = XFS_DA_OP_OKNOENT;
	ds->state = xfs_da_state_alloc(&ds->dargs);
	ds->sc = sc;
	ds->private = private;
	if (whichfork == XFS_ATTR_FORK) {
		ds.dargs.geo = mp->m_attr_geo;
		ds.lowest = 0;
		ds.highest = 0;
		ds->dargs.geo = mp->m_attr_geo;
		ds->lowest = 0;
		ds->highest = 0;
	} else {
		ds.dargs.geo = mp->m_dir_geo;
		ds.lowest = ds.dargs.geo->leafblk;
		ds.highest = ds.dargs.geo->freeblk;
		ds->dargs.geo = mp->m_dir_geo;
		ds->lowest = ds->dargs.geo->leafblk;
		ds->highest = ds->dargs.geo->freeblk;
	}
	blkno = ds.lowest;
	blkno = ds->lowest;
	level = 0;

	/* Find the root of the da tree, if present. */
	blks = ds.state->path.blk;
	error = xchk_da_btree_block(&ds, level, blkno);
	blks = ds->state->path.blk;
	error = xchk_da_btree_block(ds, level, blkno);
	if (error)
		goto out_state;
	/*
	 * We didn't find a block at ds.lowest, which means that there's
	 * We didn't find a block at ds->lowest, which means that there's
	 * no LEAF1/LEAFN tree (at least not where it's supposed to be),
	 * so jump out now.
	 */
@@ -523,16 +526,16 @@ xchk_da_btree(
		/* Handle leaf block. */
		if (blks[level].magic != XFS_DA_NODE_MAGIC) {
			/* End of leaf, pop back towards the root. */
			if (blks[level].index >= ds.maxrecs[level]) {
			if (blks[level].index >= ds->maxrecs[level]) {
				if (level > 0)
					blks[level - 1].index++;
				ds.tree_level++;
				ds->tree_level++;
				level--;
				continue;
			}

			/* Dispatch record scrubbing. */
			error = scrub_fn(&ds, level);
			error = scrub_fn(ds, level);
			if (error)
				break;
			if (xchk_should_terminate(sc, &error) ||
@@ -545,17 +548,17 @@ xchk_da_btree(


		/* End of node, pop back towards the root. */
		if (blks[level].index >= ds.maxrecs[level]) {
		if (blks[level].index >= ds->maxrecs[level]) {
			if (level > 0)
				blks[level - 1].index++;
			ds.tree_level++;
			ds->tree_level++;
			level--;
			continue;
		}

		/* Hashes in order for scrub? */
		key = xchk_da_btree_node_entry(&ds, level);
		error = xchk_da_btree_hash(&ds, level, &key->hashval);
		key = xchk_da_btree_node_entry(ds, level);
		error = xchk_da_btree_hash(ds, level, &key->hashval);
		if (error)
			goto out;

@@ -564,11 +567,11 @@ xchk_da_btree(
		level++;
		if (level >= XFS_DA_NODE_MAXDEPTH) {
			/* Too deep! */
			xchk_da_set_corrupt(&ds, level - 1);
			xchk_da_set_corrupt(ds, level - 1);
			break;
		}
		ds.tree_level--;
		error = xchk_da_btree_block(&ds, level, blkno);
		ds->tree_level--;
		error = xchk_da_btree_block(ds, level, blkno);
		if (error)
			goto out;
		if (blks[level].bp == NULL)
@@ -587,6 +590,7 @@ xchk_da_btree(
	}

out_state:
	xfs_da_state_free(ds.state);
	xfs_da_state_free(ds->state);
	kmem_free(ds);
	return error;
}
+35 −29
Original line number Diff line number Diff line
@@ -461,15 +461,10 @@ xfs_scrub_metadata(
	struct file			*file,
	struct xfs_scrub_metadata	*sm)
{
	struct xfs_scrub		sc = {
		.file			= file,
		.sm			= sm,
	};
	struct xfs_scrub		*sc;
	struct xfs_mount		*mp = XFS_I(file_inode(file))->i_mount;
	int				error = 0;

	sc.mp = mp;

	BUILD_BUG_ON(sizeof(meta_scrub_ops) !=
		(sizeof(struct xchk_meta_ops) * XFS_SCRUB_TYPE_NR));

@@ -489,51 +484,60 @@ xfs_scrub_metadata(

	xchk_experimental_warning(mp);

	sc.ops = &meta_scrub_ops[sm->sm_type];
	sc.sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type);
	sc = kmem_zalloc(sizeof(struct xfs_scrub), KM_NOFS | KM_MAYFAIL);
	if (!sc) {
		error = -ENOMEM;
		goto out;
	}

	sc->mp = mp;
	sc->file = file;
	sc->sm = sm;
	sc->ops = &meta_scrub_ops[sm->sm_type];
	sc->sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type);
retry_op:
	/*
	 * When repairs are allowed, prevent freezing or readonly remount while
	 * scrub is running with a real transaction.
	 */
	if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
		error = mnt_want_write_file(sc.file);
		error = mnt_want_write_file(sc->file);
		if (error)
			goto out;
			goto out_sc;
	}

	/* Set up for the operation. */
	error = sc.ops->setup(&sc);
	error = sc->ops->setup(sc);
	if (error)
		goto out_teardown;

	/* Scrub for errors. */
	error = sc.ops->scrub(&sc);
	if (!(sc.flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) {
	error = sc->ops->scrub(sc);
	if (!(sc->flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) {
		/*
		 * Scrubbers return -EDEADLOCK to mean 'try harder'.
		 * Tear down everything we hold, then set up again with
		 * preparation for worst-case scenarios.
		 */
		error = xchk_teardown(&sc, 0);
		error = xchk_teardown(sc, 0);
		if (error)
			goto out;
		sc.flags |= XCHK_TRY_HARDER;
			goto out_sc;
		sc->flags |= XCHK_TRY_HARDER;
		goto retry_op;
	} else if (error || (sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE))
		goto out_teardown;

	xchk_update_health(&sc);
	xchk_update_health(sc);

	if ((sc.sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
	    !(sc.flags & XREP_ALREADY_FIXED)) {
	if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
	    !(sc->flags & XREP_ALREADY_FIXED)) {
		bool needs_fix;

		/* Let debug users force us into the repair routines. */
		if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
			sc.sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
			sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;

		needs_fix = (sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
		needs_fix = (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
						 XFS_SCRUB_OFLAG_XCORRUPT |
						 XFS_SCRUB_OFLAG_PREEN));
		/*
@@ -541,7 +545,7 @@ xfs_scrub_metadata(
		 * report that back to userspace.
		 */
		if (!needs_fix) {
			sc.sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED;
			sc->sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED;
			goto out_nofix;
		}

@@ -549,26 +553,28 @@ xfs_scrub_metadata(
		 * If it's broken, userspace wants us to fix it, and we haven't
		 * already tried to fix it, then attempt a repair.
		 */
		error = xrep_attempt(&sc);
		error = xrep_attempt(sc);
		if (error == -EAGAIN) {
			/*
			 * Either the repair function succeeded or it couldn't
			 * get all the resources it needs; either way, we go
			 * back to the beginning and call the scrub function.
			 */
			error = xchk_teardown(&sc, 0);
			error = xchk_teardown(sc, 0);
			if (error) {
				xrep_failure(mp);
				goto out;
				goto out_sc;
			}
			goto retry_op;
		}
	}

out_nofix:
	xchk_postmortem(&sc);
	xchk_postmortem(sc);
out_teardown:
	error = xchk_teardown(&sc, error);
	error = xchk_teardown(sc, error);
out_sc:
	kmem_free(sc);
out:
	trace_xchk_done(XFS_I(file_inode(file)), sm, error);
	if (error == -EFSCORRUPTED || error == -EFSBADCRC) {