|
|
09c1d0 |
From: Prarit Bhargava <prarit@redhat.com>
|
|
|
09c1d0 |
|
|
|
09c1d0 |
Subject: mcelog: Deduce channel number for Haswell/Broadwell/Skylake systems
|
|
|
09c1d0 |
|
|
|
09c1d0 |
commit 6ed93e30f83519b0ab71f8ecd156b8ff0b2912b6
|
|
|
09c1d0 |
Author: Tony Luck <tony.luck@intel.com>
|
|
|
09c1d0 |
Date: Mon Sep 24 11:14:45 2018 -0700
|
|
|
09c1d0 |
|
|
|
09c1d0 |
mcelog: Deduce channel number for Haswell/Broadwell/Skylake systems
|
|
|
09c1d0 |
|
|
|
09c1d0 |
Ivy Bridge was the last system that gave us enough information
|
|
|
09c1d0 |
to figure out the exact DIMM that is the source of a memory error.
|
|
|
09c1d0 |
We gave up on DIMM logging at that point.
|
|
|
09c1d0 |
|
|
|
09c1d0 |
But we can still figure out the socket, memory controller and channel.
|
|
|
09c1d0 |
|
|
|
09c1d0 |
Signed-off-by: Tony Luck <tony.luck@intel.com>
|
|
|
09c1d0 |
Signed-off-by: Andi Kleen <ak@linux.intel.com>
|
|
|
09c1d0 |
|
|
|
09c1d0 |
diff --git a/haswell.c b/haswell.c
|
|
|
09c1d0 |
index 892ebc7248e808248798f21506b54faca147db9b..4eccbeb21a281467495e024b376d81be96b2183e 100644
|
|
|
09c1d0 |
--- a/haswell.c
|
|
|
09c1d0 |
+++ b/haswell.c
|
|
|
09c1d0 |
@@ -148,3 +148,45 @@ void hsw_decode_model(int cputype, int bank, u64 status, u64 misc)
|
|
|
09c1d0 |
break;
|
|
|
09c1d0 |
}
|
|
|
09c1d0 |
}
|
|
|
09c1d0 |
+
|
|
|
09c1d0 |
+/*
|
|
|
09c1d0 |
+ * There isn't enough information to identify the DIMM. But
|
|
|
09c1d0 |
+ * we can derive the channel from the bank number.
|
|
|
09c1d0 |
+ * There can be two memory controllers. We number the channels
|
|
|
09c1d0 |
+ * on the second controller: 4, 5, 6, 7
|
|
|
09c1d0 |
+ */
|
|
|
09c1d0 |
+void haswell_memerr_misc(struct mce *m, int *channel, int *dimm)
|
|
|
09c1d0 |
+{
|
|
|
09c1d0 |
+ u64 status = m->status;
|
|
|
09c1d0 |
+ unsigned chan;
|
|
|
09c1d0 |
+
|
|
|
09c1d0 |
+ /* Check this is a memory error */
|
|
|
09c1d0 |
+ if (!test_prefix(7, status & 0xefff))
|
|
|
09c1d0 |
+ return;
|
|
|
09c1d0 |
+
|
|
|
09c1d0 |
+ chan = EXTRACT(status, 0, 3);
|
|
|
09c1d0 |
+ if (chan == 0xf)
|
|
|
09c1d0 |
+ return;
|
|
|
09c1d0 |
+
|
|
|
09c1d0 |
+ switch (m->bank) {
|
|
|
09c1d0 |
+ case 7:
|
|
|
09c1d0 |
+ /* Home agent 0 */
|
|
|
09c1d0 |
+ break;
|
|
|
09c1d0 |
+ case 8:
|
|
|
09c1d0 |
+ /* Home agent 1 */
|
|
|
09c1d0 |
+ chan += 4;
|
|
|
09c1d0 |
+ break;
|
|
|
09c1d0 |
+ case 9: case 10: case 11: case 12:
|
|
|
09c1d0 |
+ /* Memory controller 0 */
|
|
|
09c1d0 |
+ chan = m->bank - 9;
|
|
|
09c1d0 |
+ break;
|
|
|
09c1d0 |
+ case 13: case 14: case 15: case 16:
|
|
|
09c1d0 |
+ /* Memory controller 1 */
|
|
|
09c1d0 |
+ chan = (m->bank - 13) + 4;
|
|
|
09c1d0 |
+ break;
|
|
|
09c1d0 |
+ default:
|
|
|
09c1d0 |
+ return;
|
|
|
09c1d0 |
+ }
|
|
|
09c1d0 |
+
|
|
|
09c1d0 |
+ channel[0] = chan;
|
|
|
09c1d0 |
+}
|
|
|
09c1d0 |
diff --git a/haswell.h b/haswell.h
|
|
|
09c1d0 |
index ba3fb1c3c985aec0ac1a0a271dca3c3afd18874c..712c8eb66d50a1bf63a7dbd67382fe775b59d69b 100644
|
|
|
09c1d0 |
--- a/haswell.h
|
|
|
09c1d0 |
+++ b/haswell.h
|
|
|
09c1d0 |
@@ -1,2 +1,3 @@
|
|
|
09c1d0 |
void hsw_decode_model(int cputype, int bank, u64 status, u64 misc);
|
|
|
09c1d0 |
void haswell_ep_memerr_misc(struct mce *m, int *channel, int *dimm);
|
|
|
09c1d0 |
+void haswell_memerr_misc(struct mce *m, int *channel, int *dimm);
|
|
|
09c1d0 |
diff --git a/intel.c b/intel.c
|
|
|
09c1d0 |
index 20d2acdc12daa1128d72471d53639aebf82f4854..b655c4162f8980d5d826640fa4375c7ba6b1e97d 100644
|
|
|
09c1d0 |
--- a/intel.c
|
|
|
09c1d0 |
+++ b/intel.c
|
|
|
09c1d0 |
@@ -25,6 +25,7 @@
|
|
|
09c1d0 |
#include "sandy-bridge.h"
|
|
|
09c1d0 |
#include "ivy-bridge.h"
|
|
|
09c1d0 |
#include "haswell.h"
|
|
|
09c1d0 |
+#include "skylake_xeon.h"
|
|
|
09c1d0 |
|
|
|
09c1d0 |
int memory_error_support;
|
|
|
09c1d0 |
|
|
|
09c1d0 |
@@ -140,6 +141,13 @@ static int intel_memory_error(struct mce *m, unsigned recordlen)
|
|
|
09c1d0 |
case CPU_IVY_BRIDGE_EPEX:
|
|
|
09c1d0 |
ivy_bridge_ep_memerr_misc(m, channel, dimm);
|
|
|
09c1d0 |
break;
|
|
|
09c1d0 |
+ case CPU_HASWELL_EPEX:
|
|
|
09c1d0 |
+ case CPU_BROADWELL_EPEX:
|
|
|
09c1d0 |
+ haswell_memerr_misc(m, channel, dimm);
|
|
|
09c1d0 |
+ break;
|
|
|
09c1d0 |
+ case CPU_SKYLAKE_XEON:
|
|
|
09c1d0 |
+ skylake_memerr_misc(m, channel, dimm);
|
|
|
09c1d0 |
+ break;
|
|
|
09c1d0 |
default:
|
|
|
09c1d0 |
break;
|
|
|
09c1d0 |
}
|
|
|
09c1d0 |
diff --git a/skylake_xeon.c b/skylake_xeon.c
|
|
|
09c1d0 |
index 16c6181987f0126d377b64a8f5d4a96a01bfa1c4..b02f8acd806e2a64ed1653f44349fd3e9abf374e 100644
|
|
|
09c1d0 |
--- a/skylake_xeon.c
|
|
|
09c1d0 |
+++ b/skylake_xeon.c
|
|
|
09c1d0 |
@@ -228,3 +228,45 @@ int skylake_s_ce_type(int bank, u64 status, u64 misc)
|
|
|
09c1d0 |
|
|
|
09c1d0 |
return 0;
|
|
|
09c1d0 |
}
|
|
|
09c1d0 |
+
|
|
|
09c1d0 |
+/*
|
|
|
09c1d0 |
+ * There isn't enough information to identify the DIMM. But
|
|
|
09c1d0 |
+ * we can derive the channel from the bank number.
|
|
|
09c1d0 |
+ * There can be two memory controllers. We number the channels
|
|
|
09c1d0 |
+ * on the second controller: 3, 4, 5
|
|
|
09c1d0 |
+ */
|
|
|
09c1d0 |
+void skylake_memerr_misc(struct mce *m, int *channel, int *dimm)
|
|
|
09c1d0 |
+{
|
|
|
09c1d0 |
+ u64 status = m->status;
|
|
|
09c1d0 |
+ unsigned chan;
|
|
|
09c1d0 |
+
|
|
|
09c1d0 |
+ /* Check this is a memory error */
|
|
|
09c1d0 |
+ if (!test_prefix(7, status & 0xefff))
|
|
|
09c1d0 |
+ return;
|
|
|
09c1d0 |
+
|
|
|
09c1d0 |
+ chan = EXTRACT(status, 0, 3);
|
|
|
09c1d0 |
+ if (chan == 0xf)
|
|
|
09c1d0 |
+ return;
|
|
|
09c1d0 |
+
|
|
|
09c1d0 |
+ switch (m->bank) {
|
|
|
09c1d0 |
+ case 7:
|
|
|
09c1d0 |
+ /* Home agent 0 */
|
|
|
09c1d0 |
+ break;
|
|
|
09c1d0 |
+ case 8:
|
|
|
09c1d0 |
+ /* Home agent 1 */
|
|
|
09c1d0 |
+ chan += 3;
|
|
|
09c1d0 |
+ break;
|
|
|
09c1d0 |
+ case 13: case 14: case 15:
|
|
|
09c1d0 |
+ /* Memory controller 0 */
|
|
|
09c1d0 |
+ chan = m->bank - 13;
|
|
|
09c1d0 |
+ break;
|
|
|
09c1d0 |
+ case 16: case 17: case 18:
|
|
|
09c1d0 |
+ /* Memory controller 1 */
|
|
|
09c1d0 |
+ chan = (m->bank - 16) + 3;
|
|
|
09c1d0 |
+ break;
|
|
|
09c1d0 |
+ default:
|
|
|
09c1d0 |
+ return;
|
|
|
09c1d0 |
+ }
|
|
|
09c1d0 |
+
|
|
|
09c1d0 |
+ channel[0] = chan;
|
|
|
09c1d0 |
+}
|
|
|
09c1d0 |
diff --git a/skylake_xeon.h b/skylake_xeon.h
|
|
|
09c1d0 |
index edcd9c030fa70f10ac23f2df9be948b10c73f4a1..098e6fa0e3eaff1b1d7e3040eddfb9187dabd7dd 100644
|
|
|
09c1d0 |
--- a/skylake_xeon.h
|
|
|
09c1d0 |
+++ b/skylake_xeon.h
|
|
|
09c1d0 |
@@ -1,2 +1,3 @@
|
|
|
09c1d0 |
void skylake_s_decode_model(int cputype, int bank, u64 status, u64 misc);
|
|
|
09c1d0 |
int skylake_s_ce_type(int bank, u64 status, u64 misc);
|
|
|
09c1d0 |
+void skylake_memerr_misc(struct mce *m, int *channel, int *dimm);
|