Commit a3d5dc90 authored by Yang Yang's avatar Yang Yang Committed by Linus Torvalds
Browse files

delayacct: support swapin delay accounting for swapping without blkio

Currently delayacct accounts swapin delay only for swapping that cause
blkio.  If we use zram for swapping, tools/accounting/getdelays can't
get any SWAP delay.

It's useful to get zram swapin delay information, for example to adjust
compress algorithm or /proc/sys/vm/swappiness.

Reference to PSI, it accounts any kind of swapping by doing its work in
swap_readpage(), no matter whether swapping causes blkio.  Let delayacct
do the similar work.

Link: https://lkml.kernel.org/r/20211112083813.8559-1-yang.yang29@zte.com.cn


Signed-off-by: default avatarYang Yang <yang.yang29@zte.com.cn>
Reported-by: default avatarZeal Robot <zealci@zte.com.cn>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent e83a4472
Loading
Loading
Loading
Loading
+22 −22
Original line number Diff line number Diff line
@@ -9,14 +9,6 @@

#include <uapi/linux/taskstats.h>

/*
 * Per-task flags relevant to delay accounting
 * maintained privately to avoid exhausting similar flags in sched.h:PF_*
 * Used to set current->delays->flags
 */
#define DELAYACCT_PF_SWAPIN	0x00000001	/* I am doing a swapin */
#define DELAYACCT_PF_BLKIO	0x00000002	/* I am waiting on IO */

#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info {
	raw_spinlock_t	lock;
@@ -37,13 +29,13 @@ struct task_delay_info {
	 * associated with the operation is added to XXX_delay.
	 * XXX_delay contains the accumulated delay time in nanoseconds.
	 */
	u64 blkio_start;	/* Shared by blkio, swapin */
	u64 blkio_start;
	u64 blkio_delay;	/* wait for sync block io completion */
	u64 swapin_delay;	/* wait for swapin block io completion */
	u64 swapin_start;
	u64 swapin_delay;	/* wait for swapin */
	u32 blkio_count;	/* total count of the number of sync block */
				/* io operations performed */
	u32 swapin_count;	/* total count of the number of swapin block */
				/* io operations performed */
	u32 swapin_count;	/* total count of swapin */

	u64 freepages_start;
	u64 freepages_delay;	/* wait for memory reclaim */
@@ -79,14 +71,8 @@ extern void __delayacct_freepages_start(void);
extern void __delayacct_freepages_end(void);
extern void __delayacct_thrashing_start(void);
extern void __delayacct_thrashing_end(void);

static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
{
	if (p->delays)
		return (p->delays->flags & DELAYACCT_PF_BLKIO);
	else
		return 0;
}
extern void __delayacct_swapin_start(void);
extern void __delayacct_swapin_end(void);

static inline void delayacct_set_flag(struct task_struct *p, int flag)
{
@@ -123,7 +109,6 @@ static inline void delayacct_blkio_start(void)
	if (!static_branch_unlikely(&delayacct_key))
		return;

	delayacct_set_flag(current, DELAYACCT_PF_BLKIO);
	if (current->delays)
		__delayacct_blkio_start();
}
@@ -135,7 +120,6 @@ static inline void delayacct_blkio_end(struct task_struct *p)

	if (p->delays)
		__delayacct_blkio_end(p);
	delayacct_clear_flag(p, DELAYACCT_PF_BLKIO);
}

static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
@@ -169,6 +153,18 @@ static inline void delayacct_thrashing_end(void)
		__delayacct_thrashing_end();
}

static inline void delayacct_swapin_start(void)
{
	if (current->delays)
		__delayacct_swapin_start();
}

static inline void delayacct_swapin_end(void)
{
	if (current->delays)
		__delayacct_swapin_end();
}

#else
static inline void delayacct_set_flag(struct task_struct *p, int flag)
{}
@@ -199,6 +195,10 @@ static inline void delayacct_thrashing_start(void)
{}
static inline void delayacct_thrashing_end(void)
{}
static inline void delayacct_swapin_start(void)
{}
static inline void delayacct_swapin_end(void)
{}

#endif /* CONFIG_TASK_DELAY_ACCT */

+18 −15
Original line number Diff line number Diff line
@@ -100,19 +100,10 @@ void __delayacct_blkio_start(void)
 */
void __delayacct_blkio_end(struct task_struct *p)
{
	struct task_delay_info *delays = p->delays;
	u64 *total;
	u32 *count;

	if (p->delays->flags & DELAYACCT_PF_SWAPIN) {
		total = &delays->swapin_delay;
		count = &delays->swapin_count;
	} else {
		total = &delays->blkio_delay;
		count = &delays->blkio_count;
	}

	delayacct_end(&delays->lock, &delays->blkio_start, total, count);
	delayacct_end(&p->delays->lock,
		      &p->delays->blkio_start,
		      &p->delays->blkio_delay,
		      &p->delays->blkio_count);
}

int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
@@ -179,8 +170,7 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
	unsigned long flags;

	raw_spin_lock_irqsave(&tsk->delays->lock, flags);
	ret = nsec_to_clock_t(tsk->delays->blkio_delay +
				tsk->delays->swapin_delay);
	ret = nsec_to_clock_t(tsk->delays->blkio_delay);
	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
	return ret;
}
@@ -210,3 +200,16 @@ void __delayacct_thrashing_end(void)
		      &current->delays->thrashing_delay,
		      &current->delays->thrashing_count);
}

void __delayacct_swapin_start(void)
{
	current->delays->swapin_start = local_clock();
}

void __delayacct_swapin_end(void)
{
	delayacct_end(&current->delays->lock,
		      &current->delays->swapin_start,
		      &current->delays->swapin_delay,
		      &current->delays->swapin_count);
}
+0 −4
Original line number Diff line number Diff line
@@ -3507,7 +3507,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
	if (unlikely(!si))
		goto out;

	delayacct_set_flag(current, DELAYACCT_PF_SWAPIN);
	page = lookup_swap_cache(entry, vma, vmf->address);
	swapcache = page;

@@ -3555,7 +3554,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
					vmf->address, &vmf->ptl);
			if (likely(pte_same(*vmf->pte, vmf->orig_pte)))
				ret = VM_FAULT_OOM;
			delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
			goto unlock;
		}

@@ -3569,13 +3567,11 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
		 * owner processes (which may be unknown at hwpoison time)
		 */
		ret = VM_FAULT_HWPOISON;
		delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
		goto out_release;
	}

	locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags);

	delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
	if (!locked) {
		ret |= VM_FAULT_RETRY;
		goto out_release;
+3 −0
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@
#include <linux/psi.h>
#include <linux/uio.h>
#include <linux/sched/task.h>
#include <linux/delayacct.h>

void end_swap_bio_write(struct bio *bio)
{
@@ -370,6 +371,7 @@ int swap_readpage(struct page *page, bool synchronous)
	 * significant part of overall IO time.
	 */
	psi_memstall_enter(&pflags);
	delayacct_swapin_start();

	if (frontswap_load(page) == 0) {
		SetPageUptodate(page);
@@ -432,6 +434,7 @@ int swap_readpage(struct page *page, bool synchronous)

out:
	psi_memstall_leave(&pflags);
	delayacct_swapin_end();
	return ret;
}