Commit fbb3bdf5 authored by Heiko Carstens's avatar Heiko Carstens Committed by Alexander Gordeev
Browse files

s390/nmi: Print additional information



In case of an unrecoverable machine check only the machine check interrupt
code is printed to the console before the machine is stopped. This makes
root cause analysis sometimes hard.

Print additional machine check information to make analysis easier.
The output now looks like this:

Unrecoverable machine check, code: 00400F5F4C3B0000
6.16.0-rc2-11605-g987a9431e53a-dirty
HW: IBM 3931 A01 704 (z/VM 7.4.0)
PSW: 0706C00180000000 000003FFE0F0462E PFX: 0000000000070000
LBA: 000003FFE0F0462A EDC: 0000000000000000 FSA: 0000000000000000
CRS:
0080000014966A12 0000000087CB41C7 0000000000BFF140 0000000000000000
000000000000FFFF 0000000000BFF140 0000000071000000 0000000087CB41C7
0000000000008000 0000000000000000 0000000000000000 0000000000000000
0000000000000000 00000000024C0007 00000000DB000000 0000000000BFF000
GPRS:
FFFFFFFF00000000 000003FFE0F0462E E10EA4F489F897A6 0000000000000000
7FFFFFF2C0413C4C 000003FFE19B7010 0000000000000000 0000000000000000
0000000000000000 00000001F76B3380 000003FFE15D4050 0000000000000005
0000000000000000 0000000000070000 000003FFE0F0586C 0000037FE00B7DA0
System stopped

Reviewed-by: default avatarAlexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: default avatarHeiko Carstens <hca@linux.ibm.com>
Signed-off-by: default avatarAlexander Gordeev <agordeev@linux.ibm.com>
parent 819275e1
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -41,6 +41,8 @@ struct parmarea {
	char command_line[COMMAND_LINE_SIZE];		/* 0x10480 */
};

extern char arch_hw_string[128];

extern struct parmarea parmarea;

extern unsigned int zlib_dfltcc_support;
+3 −0
Original line number Diff line number Diff line
@@ -105,6 +105,8 @@ static inline void strim_all(char *str)
	}
}

char arch_hw_string[128];

static noinline __init void setup_arch_string(void)
{
	struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page;
@@ -131,6 +133,7 @@ static noinline __init void setup_arch_string(void)
			machine_is_vm() ? "z/VM" :
			machine_is_kvm() ? "KVM" : "unknown");
	}
	sprintf(arch_hw_string, "HW: %s (%s)", mstr, hvstr);
	dump_stack_set_arch_desc("%s (%s)", mstr, hvstr);
}

+70 −5
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
 */

#include <linux/kernel_stat.h>
#include <linux/utsname.h>
#include <linux/cpufeature.h>
#include <linux/init.h>
#include <linux/errno.h>
@@ -115,18 +116,82 @@ static __always_inline char *u64_to_hex(char *dest, u64 val)
	return dest;
}

static notrace void nmi_print_info(void)
{
	struct lowcore *lc = get_lowcore();
	char message[100];
	char *ptr;
	int i;

	ptr = nmi_puts(message, "Unrecoverable machine check, code: ");
	ptr = u64_to_hex(ptr, lc->mcck_interruption_code);
	ptr = nmi_puts(ptr, "\n");
	sclp_emergency_printk(message);

	ptr = nmi_puts(message, init_utsname()->release);
	ptr = nmi_puts(ptr, "\n");
	sclp_emergency_printk(message);

	ptr = nmi_puts(message, arch_hw_string);
	ptr = nmi_puts(ptr, "\n");
	sclp_emergency_printk(message);

	ptr = nmi_puts(message, "PSW: ");
	ptr = u64_to_hex(ptr, lc->mcck_old_psw.mask);
	ptr = nmi_puts(ptr, " ");
	ptr = u64_to_hex(ptr, lc->mcck_old_psw.addr);
	ptr = nmi_puts(ptr, " PFX: ");
	ptr = u64_to_hex(ptr, (u64)get_lowcore());
	ptr = nmi_puts(ptr, "\n");
	sclp_emergency_printk(message);

	ptr = nmi_puts(message, "LBA: ");
	ptr = u64_to_hex(ptr, lc->last_break_save_area);
	ptr = nmi_puts(ptr, " EDC: ");
	ptr = u64_to_hex(ptr, lc->external_damage_code);
	ptr = nmi_puts(ptr, " FSA: ");
	ptr = u64_to_hex(ptr, lc->failing_storage_address);
	ptr = nmi_puts(ptr, "\n");
	sclp_emergency_printk(message);

	ptr = nmi_puts(message, "CRS:\n");
	sclp_emergency_printk(message);
	ptr = message;
	for (i = 0; i < 16; i++) {
		ptr = u64_to_hex(ptr, lc->cregs_save_area[i].val);
		ptr = nmi_puts(ptr, " ");
		if ((i + 1) % 4 == 0) {
			ptr = nmi_puts(ptr, "\n");
			sclp_emergency_printk(message);
			ptr = message;
		}
	}

	ptr = nmi_puts(message, "GPRS:\n");
	sclp_emergency_printk(message);
	ptr = message;
	for (i = 0; i < 16; i++) {
		ptr = u64_to_hex(ptr, lc->gpregs_save_area[i]);
		ptr = nmi_puts(ptr, " ");
		if ((i + 1) % 4 == 0) {
			ptr = nmi_puts(ptr, "\n");
			sclp_emergency_printk(message);
			ptr = message;
		}
	}

	ptr = nmi_puts(message, "System stopped\n");
	sclp_emergency_printk(message);
}

static notrace void s390_handle_damage(void)
{
	struct lowcore *lc = get_lowcore();
	union ctlreg0 cr0, cr0_new;
	char message[100];
	psw_t psw_save;
	char *ptr;

	smp_emergency_stop();
	diag_amode31_ops.diag308_reset();
	ptr = nmi_puts(message, "System stopped due to unrecoverable machine check, code: 0x");
	u64_to_hex(ptr, lc->mcck_interruption_code);

	/*
	 * Disable low address protection and make machine check new PSW a
@@ -140,7 +205,7 @@ static notrace void s390_handle_damage(void)
	psw_bits(lc->mcck_new_psw).io = 0;
	psw_bits(lc->mcck_new_psw).ext = 0;
	psw_bits(lc->mcck_new_psw).wait = 1;
	sclp_emergency_printk(message);
	nmi_print_info();

	/*
	 * Restore machine check new PSW and control register 0 to original