Commit e9d4e1ee authored by Yu Zhao's avatar Yu Zhao Committed by Andrew Morton
Browse files

mm: multi-gen LRU: clarify scan_control flags

Among the flags in scan_control:
1. sc->may_swap, which indicates swap constraint due to memsw.max, is
   supported as usual.
2. sc->proactive, which indicates reclaim by memory.reclaim, may not
   opportunistically skip the aging path, since it is considered less
   latency sensitive.
3. !(sc->gfp_mask & __GFP_IO), which indicates IO constraint, lowers
   swappiness to prioritize file LRU, since clean file folios are more
   likely to exist.
4. sc->may_writepage and sc->may_unmap, which indicates opportunistic
   reclaim, are rejected, since unmapped clean folios are already
   prioritized. Scanning for more of them is likely futile and can
   cause high reclaim latency when there is a large number of memcgs.

The rest are handled by the existing code.

Link: https://lkml.kernel.org/r/20221222041905.2431096-8-yuzhao@google.com


Signed-off-by: default avatarYu Zhao <yuzhao@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Larabel <Michael@MichaelLarabel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent e4dde56c
Loading
Loading
Loading
Loading
+28 −28
Original line number Diff line number Diff line
@@ -3210,6 +3210,9 @@ static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc)
	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
	struct pglist_data *pgdat = lruvec_pgdat(lruvec);

	if (!sc->may_swap)
		return 0;

	if (!can_demote(pgdat->node_id, sc) &&
	    mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH)
		return 0;
@@ -4236,7 +4239,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
	} while (err == -EAGAIN);
}

static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat)
static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat, bool force_alloc)
{
	struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk;

@@ -4244,7 +4247,7 @@ static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat)
		VM_WARN_ON_ONCE(walk);

		walk = &pgdat->mm_walk;
	} else if (!pgdat && !walk) {
	} else if (!walk && force_alloc) {
		VM_WARN_ON_ONCE(current_is_kswapd());

		walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
@@ -4430,7 +4433,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
		goto done;
	}

	walk = set_mm_walk(NULL);
	walk = set_mm_walk(NULL, true);
	if (!walk) {
		success = iterate_mm_list_nowalk(lruvec, max_seq);
		goto done;
@@ -4499,8 +4502,6 @@ static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc
	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
	DEFINE_MIN_SEQ(lruvec);

	VM_WARN_ON_ONCE(sc->memcg_low_reclaim);

	/* see the comment on lru_gen_folio */
	gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
	birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
@@ -4756,12 +4757,8 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca
{
	bool success;

	/* unmapping inhibited */
	if (!sc->may_unmap && folio_mapped(folio))
		return false;

	/* swapping inhibited */
	if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) &&
	if (!(sc->gfp_mask & __GFP_IO) &&
	    (folio_test_dirty(folio) ||
	     (folio_test_anon(folio) && !folio_test_swapcache(folio))))
		return false;
@@ -4858,9 +4855,8 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
	__count_vm_events(PGSCAN_ANON + type, isolated);

	/*
	 * There might not be eligible pages due to reclaim_idx, may_unmap and
	 * may_writepage. Check the remaining to prevent livelock if it's not
	 * making progress.
	 * There might not be eligible folios due to reclaim_idx. Check the
	 * remaining to prevent livelock if it's not making progress.
	 */
	return isolated || !remaining ? scanned : 0;
}
@@ -5120,9 +5116,7 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool
	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
	DEFINE_MAX_SEQ(lruvec);

	if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg) ||
	    (mem_cgroup_below_low(sc->target_mem_cgroup, memcg) &&
	     !sc->memcg_low_reclaim))
	if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg))
		return 0;

	if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
@@ -5150,17 +5144,14 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
	long nr_to_scan;
	unsigned long scanned = 0;
	unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
	int swappiness = get_swappiness(lruvec, sc);

	/* clean file folios are more likely to exist */
	if (swappiness && !(sc->gfp_mask & __GFP_IO))
		swappiness = 1;

	while (true) {
		int delta;
		int swappiness;

		if (sc->may_swap)
			swappiness = get_swappiness(lruvec, sc);
		else if (!cgroup_reclaim(sc) && get_swappiness(lruvec, sc))
			swappiness = 1;
		else
			swappiness = 0;

		nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
		if (nr_to_scan <= 0)
@@ -5291,12 +5282,13 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
	struct blk_plug plug;

	VM_WARN_ON_ONCE(global_reclaim(sc));
	VM_WARN_ON_ONCE(!sc->may_writepage || !sc->may_unmap);

	lru_add_drain();

	blk_start_plug(&plug);

	set_mm_walk(lruvec_pgdat(lruvec));
	set_mm_walk(NULL, sc->proactive);

	if (try_to_shrink_lruvec(lruvec, sc))
		lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG);
@@ -5352,11 +5344,19 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *

	VM_WARN_ON_ONCE(!global_reclaim(sc));

	/*
	 * Unmapped clean folios are already prioritized. Scanning for more of
	 * them is likely futile and can cause high reclaim latency when there
	 * is a large number of memcgs.
	 */
	if (!sc->may_writepage || !sc->may_unmap)
		goto done;

	lru_add_drain();

	blk_start_plug(&plug);

	set_mm_walk(pgdat);
	set_mm_walk(pgdat, sc->proactive);

	set_initial_priority(pgdat, sc);

@@ -5374,7 +5374,7 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
	clear_mm_walk();

	blk_finish_plug(&plug);

done:
	/* kswapd should never fail */
	pgdat->kswapd_failures = 0;
}
@@ -5943,7 +5943,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
	set_task_reclaim_state(current, &sc.reclaim_state);
	flags = memalloc_noreclaim_save();
	blk_start_plug(&plug);
	if (!set_mm_walk(NULL)) {
	if (!set_mm_walk(NULL, true)) {
		err = -ENOMEM;
		goto done;
	}