Commit 7f1b8e0d authored by Borislav Petkov's avatar Borislav Petkov
Browse files

x86/mce: Remove the tolerance level control



This is pretty much unused and not really useful. What is more, all
relevant MCA hardware has recoverable machine checks support so there's
no real need to tweak MCA tolerance levels in order to *maybe* extend
machine lifetime.

So rip it out.

Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/YcDq8PxvKtTENl/e@zn.tnic
parent 8ca97812
Loading
Loading
Loading
Loading
+37 −0
Original line number Diff line number Diff line
What:		/sys/devices/system/machinecheck/machinecheckX/tolerant
Contact:	Borislav Petkov <bp@suse.de>
Date:		Dec, 2021
Description:
		Unused and obsolete after the advent of recoverable machine
		checks (see last sentence below) and those are present since
		2010 (Nehalem).

		Original description:

		The entries appear for each CPU, but they are truly shared
		between all CPUs.

		Tolerance level. When a machine check exception occurs for a
		non corrected machine check the kernel can take different
		actions.

		Since machine check exceptions can happen any time it is
		sometimes risky for the kernel to kill a process because it
		defies normal kernel locking rules. The tolerance level
		configures how hard the kernel tries to recover even at some
		risk of	deadlock. Higher tolerant values trade potentially
		better uptime with the risk of a crash or even corruption
		(for tolerant >= 3).

		==  ===========================================================
		 0  always panic on uncorrected errors, log corrected errors
		 1  panic or SIGBUS on uncorrected errors, log corrected errors
		 2  SIGBUS or log uncorrected errors, log corrected errors
		 3  never panic or SIGBUS, log all errors (for testing only)
		==  ===========================================================

		Default: 1

		Note this only makes a difference if the CPU allows recovery
		from a machine check exception. Current x86 CPUs generally
		do not.
+0 −32
Original line number Diff line number Diff line
@@ -53,38 +53,6 @@ Description:
		(but some corrected errors might be still reported
		in other ways)

What:		/sys/devices/system/machinecheck/machinecheckX/tolerant
Contact:	Andi Kleen <ak@linux.intel.com>
Date:		Feb, 2007
Description:
		The entries appear for each CPU, but they are truly shared
		between all CPUs.

		Tolerance level. When a machine check exception occurs for a
		non corrected machine check the kernel can take different
		actions.

		Since machine check exceptions can happen any time it is
		sometimes risky for the kernel to kill a process because it
		defies normal kernel locking rules. The tolerance level
		configures how hard the kernel tries to recover even at some
		risk of	deadlock. Higher tolerant values trade potentially
		better uptime with the risk of a crash or even corruption
		(for tolerant >= 3).

		==  ===========================================================
		 0  always panic on uncorrected errors, log corrected errors
		 1  panic or SIGBUS on uncorrected errors, log corrected errors
		 2  SIGBUS or log uncorrected errors, log corrected errors
		 3  never panic or SIGBUS, log all errors (for testing only)
		==  ===========================================================

		Default: 1

		Note this only makes a difference if the CPU allows recovery
		from a machine check exception. Current x86 CPUs generally
		do not.

What:		/sys/devices/system/machinecheck/machinecheckX/trigger
Contact:	Andi Kleen <ak@linux.intel.com>
Date:		Feb, 2007
+0 −2
Original line number Diff line number Diff line
@@ -60,8 +60,6 @@ There are two (actually three) modes memory failure recovery can be in:

vm.memory_failure_recovery sysctl set to zero:
	All memory failures cause a panic. Do not attempt recovery.
	(on x86 this can be also affected by the tolerant level of the
	MCE subsystem)

early kill
	(can be controlled globally and per process)
+1 −8
Original line number Diff line number Diff line
@@ -47,14 +47,7 @@ Please see Documentation/x86/x86_64/machinecheck.rst for sysfs runtime tunables.
		in a reboot. On Intel systems it is enabled by default.
   mce=nobootlog
		Disable boot machine check logging.
   mce=tolerancelevel[,monarchtimeout] (number,number)
		tolerance levels:
		0: always panic on uncorrected errors, log corrected errors
		1: panic or SIGBUS on uncorrected errors, log corrected errors
		2: SIGBUS or log uncorrected errors, log corrected errors
		3: never panic or SIGBUS, log all errors (for testing only)
		Default is 1
		Can be also set using sysfs which is preferable.
   mce=monarchtimeout (number)
		monarchtimeout:
		Sets the time in us to wait for other CPUs on machine checks. 0
		to disable.
+20 −33
Original line number Diff line number Diff line
@@ -86,14 +86,6 @@ struct mce_vendor_flags mce_flags __read_mostly;

struct mca_config mca_cfg __read_mostly = {
	.bootlog  = -1,
	/*
	 * Tolerant levels:
	 * 0: always panic on uncorrected errors, log corrected errors
	 * 1: panic or SIGBUS on uncorrected errors, log corrected errors
	 * 2: SIGBUS or log uncorrected errors (if possible), log corr. errors
	 * 3: never panic or SIGBUS, log all errors (for testing only)
	 */
	.tolerant = 1,
	.monarch_timeout = -1
};

@@ -753,7 +745,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
			goto clear_it;

		mce_read_aux(&m, i);
		m.severity = mce_severity(&m, NULL, mca_cfg.tolerant, NULL, false);
		m.severity = mce_severity(&m, NULL, NULL, false);
		/*
		 * Don't get the IP here because it's unlikely to
		 * have anything to do with the actual error location.
@@ -887,7 +879,7 @@ static __always_inline int mce_no_way_out(struct mce *m, char **msg, unsigned lo
			quirk_sandybridge_ifu(i, m, regs);

		m->bank = i;
		if (mce_severity(m, regs, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
		if (mce_severity(m, regs, &tmp, true) >= MCE_PANIC_SEVERITY) {
			mce_read_aux(m, i);
			*msg = tmp;
			return 1;
@@ -935,12 +927,11 @@ static noinstr int mce_timed_out(u64 *t, const char *msg)
	if (!mca_cfg.monarch_timeout)
		goto out;
	if ((s64)*t < SPINUNIT) {
		if (mca_cfg.tolerant <= 1) {
		if (cpumask_and(&mce_missing_cpus, cpu_online_mask, &mce_missing_cpus))
			pr_emerg("CPUs not responding to MCE broadcast (may include false positives): %*pbl\n",
				 cpumask_pr_args(&mce_missing_cpus));
		mce_panic(msg, NULL, NULL);
		}

		ret = 1;
		goto out;
	}
@@ -1004,9 +995,9 @@ static void mce_reign(void)
	 * This dumps all the mces in the log buffer and stops the
	 * other CPUs.
	 */
	if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
	if (m && global_worst >= MCE_PANIC_SEVERITY) {
		/* call mce_severity() to get "msg" for panic */
		mce_severity(m, NULL, mca_cfg.tolerant, &msg, true);
		mce_severity(m, NULL, &msg, true);
		mce_panic("Fatal machine check", m, msg);
	}

@@ -1020,7 +1011,7 @@ static void mce_reign(void)
	 * No machine check event found. Must be some external
	 * source or one CPU is hung. Panic.
	 */
	if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3)
	if (global_worst <= MCE_KEEP_SEVERITY)
		mce_panic("Fatal machine check from unknown source", NULL, NULL);

	/*
@@ -1267,7 +1258,7 @@ __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
		/* Set taint even when machine check was not enabled. */
		taint++;

		severity = mce_severity(m, regs, cfg->tolerant, NULL, true);
		severity = mce_severity(m, regs, NULL, true);

		/*
		 * When machine check was for corrected/deferred handler don't
@@ -1425,7 +1416,6 @@ noinstr void do_machine_check(struct pt_regs *regs)
	int worst = 0, order, no_way_out, kill_current_task, lmce, taint = 0;
	DECLARE_BITMAP(valid_banks, MAX_NR_BANKS) = { 0 };
	DECLARE_BITMAP(toclear, MAX_NR_BANKS) = { 0 };
	struct mca_config *cfg = &mca_cfg;
	struct mce m, *final;
	char *msg = NULL;

@@ -1447,7 +1437,7 @@ noinstr void do_machine_check(struct pt_regs *regs)

	/*
	 * If no_way_out gets set, there is no safe way to recover from this
	 * MCE.  If mca_cfg.tolerant is cranked up, we'll try anyway.
	 * MCE.
	 */
	no_way_out = 0;

@@ -1481,7 +1471,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
	 * severity is MCE_AR_SEVERITY we have other options.
	 */
	if (!(m.mcgstatus & MCG_STATUS_RIPV))
		kill_current_task = (cfg->tolerant == 3) ? 0 : 1;
		kill_current_task = 1;
	/*
	 * Check if this MCE is signaled to only this logical processor,
	 * on Intel, Zhaoxin only.
@@ -1498,7 +1488,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
	 * to see it will clear it.
	 */
	if (lmce) {
		if (no_way_out && cfg->tolerant < 3)
		if (no_way_out)
			mce_panic("Fatal local machine check", &m, msg);
	} else {
		order = mce_start(&no_way_out);
@@ -1518,7 +1508,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
			if (!no_way_out)
				no_way_out = worst >= MCE_PANIC_SEVERITY;

			if (no_way_out && cfg->tolerant < 3)
			if (no_way_out)
				mce_panic("Fatal machine check on current CPU", &m, msg);
		}
	} else {
@@ -1530,8 +1520,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
		 * fatal error. We call "mce_severity()" again to
		 * make sure we have the right "msg".
		 */
		if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
			mce_severity(&m, regs, cfg->tolerant, &msg, true);
		if (worst >= MCE_PANIC_SEVERITY) {
			mce_severity(&m, regs, &msg, true);
			mce_panic("Local fatal machine check!", &m, msg);
		}
	}
@@ -2267,10 +2257,9 @@ static int __init mcheck_enable(char *str)
		cfg->bios_cmci_threshold = 1;
	else if (!strcmp(str, "recovery"))
		cfg->recovery = 1;
	else if (isdigit(str[0])) {
		if (get_option(&str, &cfg->tolerant) == 2)
	else if (isdigit(str[0]))
		get_option(&str, &(cfg->monarch_timeout));
	} else {
	else {
		pr_info("mce argument %s ignored. Please use /sys\n", str);
		return 0;
	}
@@ -2520,7 +2509,6 @@ static ssize_t store_int_with_restart(struct device *s,
	return ret;
}

static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant);
static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout);
static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce);
static DEVICE_BOOL_ATTR(print_all, 0644, mca_cfg.print_all);
@@ -2541,7 +2529,6 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = {
};

static struct device_attribute *mce_device_attrs[] = {
	&dev_attr_tolerant.attr,
	&dev_attr_check_interval.attr,
#ifdef CONFIG_X86_MCELOG_LEGACY
	&dev_attr_trigger,
Loading