diff --git a/CHANGES b/CHANGES
index cd279c4..e3c4044 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,5 +1,9 @@
<newer changes first>
+Changes file is obsolete.
+Please see git log on https://git.kernel.org/cgit/utils/cpu/mce/mcelog.git/
+for newer changes.
+
Add Linux Kongress 2010 paper
Add Sandy Bridge Support
Write pid file by default in daemon mode
diff --git a/Makefile b/Makefile
index f8199f6..f3ba998 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,7 @@
CFLAGS := -g -Os
prefix := /usr
etcprefix :=
+MANDIR := ${prefix}/share/man
# Define appropiately for your distribution
# DOCDIR := /usr/share/doc/packages/mcelog
@@ -54,21 +55,27 @@ SRC := $(OBJ:.o=.c)
mcelog: ${OBJ}
# dbquery intentionally not installed by default
-install: mcelog
- mkdir -p $(DESTDIR)${etcprefix}/etc/mcelog $(DESTDIR)${prefix}/sbin $(DESTDIR)${prefix}/share/man/man8
+install: mcelog mcelog.conf mcelog.conf.5 mcelog.triggers.5
+ mkdir -p $(DESTDIR)${etcprefix}/etc/mcelog $(DESTDIR)${prefix}/sbin $(DESTDIR)$(MANDIR)/man5 $(DESTDIR)$(MANDIR)/man8
install -m 755 -p mcelog $(DESTDIR)${prefix}/sbin/mcelog
- install -m 644 -p mcelog.8 $(DESTDIR)${prefix}/share/man/man8
+ install -m 644 -p mcelog.8 $(DESTDIR)$(MANDIR)/man8
+ install -m 644 -p mcelog.conf.5 $(DESTDIR)$(MANDIR)/man5
+ install -m 644 -p mcelog.triggers.5 $(DESTDIR)$(MANDIR)/man5
install -m 644 -p -b mcelog.conf $(DESTDIR)${etcprefix}/etc/mcelog/mcelog.conf
for i in ${TRIGGERS} ; do \
install -m 755 -p -b triggers/$$i $(DESTDIR)${etcprefix}/etc/mcelog ; \
done
ifdef DOCDIR
+ install -d 755 $(DESTDIR)${DOCDIR}
install -m 644 -p ${DOC} $(DESTDIR)${DOCDIR}
else
echo
echo "Consider defining DOCDIR to install additional documentation"
endif
+mcelog.conf.5: mcelog.conf config-intro.man
+ ./genconfig.py mcelog.conf config-intro.man > mcelog.conf.5
+
clean: test-clean
rm -f ${CLEAN} ${OBJ}
diff --git a/README b/README
index 08184ed..8aa8ec4 100644
--- a/README
+++ b/README
@@ -2,11 +2,15 @@ mcelog is the user space backend for logging machine check errors
reported by the hardware to the kernel. The kernel does the immediate
actions (like killing processes etc.) and mcelog decodes the errors
and manages various other advanced error responses like
-offlining memory, CPUs or triggering events.
+offlining memory, CPUs or triggering events. In addition
+mcelog also handles corrected errors, by logging and accounting them.
It primarily handles machine checks and thermal events, which
are reported for errors detected by the CPU.
+For more details on what mcelog can do and the underlying theory
+see http://www.mcelog.org
+
It is recommended that mcelog runs on all x86 machines, both
64bit (since early 2.6) and 32bit (since 2.6.32)
@@ -40,6 +44,11 @@ mce.pdf is a very old paper describing the first releases of mcelog
For distributors:
+You can run mcelog from systemd or similar daemons. An example
+systemd unit file is in mcelog.service.
+
+For older distributions using init scripts:
+
Please install a init script by default that runs mcelog in daemon mode.
The mcelog.init script is a good starting point.
diff --git a/client.c b/client.c
index 6a67683..7c7aeb8 100644
--- a/client.c
+++ b/client.c
@@ -29,9 +29,9 @@ void ask_server(char *command)
{
struct sockaddr_un sun;
int fd;
+ FILE * fp;
int n;
char buf[1024];
- int done;
char *path = config_string("server", "socket-path");
if (!path)
path = SOCKET_PATH;
@@ -52,14 +52,18 @@ void ask_server(char *command)
if (write(fd, command, n) != n)
SYSERRprintf("client command write");
- done = 0;
- while (!done && (n = read(fd, buf, sizeof buf)) > 0) {
- if (n >= 5 && !memcmp(buf + n - 5, "done\n", 5)) {
- n -= 5;
- done = 1;
+ if ((fp = fdopen(fd, "r")) != NULL) {
+ while (fgets(buf, sizeof buf, fp)) {
+ n = strlen(buf);
+ if (n >= 5 && !memcmp(buf + n - 5, "done\n", 5)) {
+ fclose(fp);
+ return;
+ }
+
+ fputs(buf, stdout);
}
- write(1, buf, n);
+ fclose(fp);
}
- if (n < 0)
- SYSERRprintf("client read");
+
+ SYSERRprintf("client read");
}
diff --git a/config-intro.man b/config-intro.man
new file mode 100644
index 0000000..c06610d
--- /dev/null
+++ b/config-intro.man
@@ -0,0 +1,10 @@
+.SH NAME
+mcelog.conf \- mcelog.conf reference
+.SH SYNOPSIS
+.B /etc/mcelog.conf
+.SH DESCRIPTION
+
+/etc/mcelog.conf is the main configuration file for
+.B mcelog(8).
+This is configuration file separated into sections including
+a default section.
diff --git a/dmi.c b/dmi.c
index 290a053..b5492cd 100644
--- a/dmi.c
+++ b/dmi.c
@@ -162,6 +162,8 @@ static int get_efi_base_addr(size_t *address)
check_symbol:
while ((fgets(linebuf, sizeof(linebuf) - 1, efi_systab)) != NULL) {
char *addrp = strchr(linebuf, '=');
+ if (!addrp)
+ break;
*(addrp++) = '\0';
if (strcmp(linebuf, "SMBIOS") == 0) {
diff --git a/genconfig.py b/genconfig.py
new file mode 100755
index 0000000..aed6992
--- /dev/null
+++ b/genconfig.py
@@ -0,0 +1,80 @@
+#!/usr/bin/python
+# generate man config documentation from mcelog.conf example
+# genconfig.py mcelog.conf intro.html
+import sys
+import re
+import string
+import argparse
+
+ap = argparse.ArgumentParser(description="generate man config documentation from mcelog.conf example")
+ap.add_argument('config', type=argparse.FileType('r'), help="mcelog example config file")
+ap.add_argument('intro', type=argparse.FileType('r'), help="intro file")
+args = ap.parse_args()
+
+def parse(f):
+ lineno = 1
+ explanation = 0
+ header = 1
+ for line in f:
+ lineno += 1
+
+ # skip first comment
+ if header:
+ if not re.match('^#', line):
+ header = 0
+ continue
+
+ # explanation
+ m = re.match('^#\s(.*)', line)
+ if m:
+ explanation += 1
+ s = m.group(1)
+ if explanation == 1:
+ s = string.capitalize(s)
+ print s
+ continue
+
+ if explanation:
+ print ".PP"
+ explanation = 0
+
+ # empty line: new option
+ if re.match('\s+', line):
+ new_option()
+ continue
+ # group
+ m = re.match('\[(.*)\]', line)
+ if m:
+ start_group(m.group(1))
+ continue
+ # config option
+ m = re.match('^(#?)([a-z-]+) = (.*)', line)
+ if m:
+ config_option(m.group(1), m.group(2), m.group(3))
+ continue
+ print >>sys.stderr, "Unparseable line %d" % (lineno-1)
+
+def config_option(enabled, name, value):
+ print ".B %s = %s" % (name, value)
+ print ".PP"
+
+def start_group(name):
+ print ".SS \"The %s config section\"" % (name)
+
+def new_option():
+ print ".PP"
+
+
+print """
+.\" Auto generated mcelog.conf manpage. Do not edit.
+.TH "mcelog.conf" 5 "mcelog"
+"""
+
+print args.intro.read()
+parse(args.config)
+print """
+.SH SEE ALSO
+.BR mcelog (8),
+.BR mcelog.triggers (5)
+.B http://www.mcelog.org
+"""
diff --git a/haswell.c b/haswell.c
index 0fef6a5..b309ae5 100644
--- a/haswell.c
+++ b/haswell.c
@@ -1,5 +1,5 @@
/* Copyright (C) 2013 Intel Corporation
- Decode Intel Ivy Bridge specific machine check errors.
+ Decode Intel Haswell specific machine check errors.
mcelog is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public
diff --git a/intel.c b/intel.c
index fe08eab..f893be5 100644
--- a/intel.c
+++ b/intel.c
@@ -34,7 +34,8 @@ void intel_cpu_init(enum cputype cpu)
if (cpu == CPU_NEHALEM || cpu == CPU_XEON75XX || cpu == CPU_INTEL ||
cpu == CPU_SANDY_BRIDGE || cpu == CPU_SANDY_BRIDGE_EP ||
cpu == CPU_IVY_BRIDGE || cpu == CPU_IVY_BRIDGE_EPEX ||
- cpu == CPU_HASWELL || cpu == CPU_HASWELL_EPEX)
+ cpu == CPU_HASWELL || cpu == CPU_HASWELL_EPEX || cpu == CPU_BROADWELL ||
+ cpu == CPU_KNIGHTS_LANDING)
memory_error_support = 1;
}
@@ -72,6 +73,15 @@ enum cputype select_intel_cputype(int family, int model)
return CPU_HASWELL;
else if (model == 0x3f)
return CPU_HASWELL_EPEX;
+ else if (model == 0x3d || model == 0x56)
+ return CPU_BROADWELL;
+ else if (model == 0x57)
+ return CPU_KNIGHTS_LANDING;
+ else if (model == 0x1c || model == 0x26 || model == 0x27 ||
+ model == 0x35 || model == 0x36 || model == 0x36 ||
+ model == 0x37 || model == 0x4a || model == 0x4c ||
+ model == 0x4d || model == 0x5a || model == 0x5d)
+ return CPU_ATOM;
if (model > 0x1a) {
Eprintf("Family 6 Model %x CPU: only decoding architectural errors\n",
model);
diff --git a/intel.h b/intel.h
index 00191d5..9d109b1 100644
--- a/intel.h
+++ b/intel.h
@@ -19,5 +19,7 @@ extern int memory_error_support;
case CPU_IVY_BRIDGE: \
case CPU_IVY_BRIDGE_EPEX: \
case CPU_HASWELL: \
- case CPU_HASWELL_EPEX
+ case CPU_HASWELL_EPEX: \
+ case CPU_BROADWELL: \
+ case CPU_KNIGHTS_LANDING
diff --git a/leaky-bucket.c b/leaky-bucket.c
index c2c501b..721ab22 100644
--- a/leaky-bucket.c
+++ b/leaky-bucket.c
@@ -25,7 +25,7 @@ time_t __attribute__((weak)) bucket_time(void)
return time(NULL);
}
-static void bucket_age(const struct bucket_conf *c, struct leaky_bucket *b,
+void bucket_age(const struct bucket_conf *c, struct leaky_bucket *b,
time_t now)
{
long diff;
diff --git a/leaky-bucket.h b/leaky-bucket.h
index 497719e..860ba3c 100644
--- a/leaky-bucket.h
+++ b/leaky-bucket.h
@@ -27,5 +27,7 @@ char *bucket_output(const struct bucket_conf *c, struct leaky_bucket *b);
int bucket_conf_init(struct bucket_conf *c, const char *rate);
void bucket_init(struct leaky_bucket *b);
time_t bucket_time(void);
+void bucket_age(const struct bucket_conf *c, struct leaky_bucket *b,
+ time_t now);
#endif
diff --git a/mcelog.8 b/mcelog.8
index f8a77c4..3781db6 100644
--- a/mcelog.8
+++ b/mcelog.8
@@ -1,5 +1,4 @@
-.\" disk db commented out for now because it's not usable enough
-.TH MCELOG 8 "May 2009" "" "Linux's Administrator's Manual"
+.TH MCELOG 8 "Mar 2015" "" "Linux's Administrator's Manual"
.SH NAME
mcelog \- Decode kernel machine check log on x86 machines
.SH SYNOPSIS
@@ -26,13 +25,16 @@ in main memory by an integrated memory controller, data
transfer errors on the front side bus or CPU interconnect or other internal
errors.
Possible causes can be cosmic radiation, instable power supplies,
-cooling problems, broken hardware, or bad luck.
+cooling problems, broken hardware, running systems out of specification,
+or bad luck.
Most errors can be corrected by the CPU by internal error correction
mechanisms. Uncorrected errors cause machine check exceptions which
-may panic the machine.
+may kill processes or panic the machine. A small number of corrected
+errors is usually not a cause for worry, but a large number can indicate
+future failure.
-When a corrected error happens the x86 kernel writes a record describing
+When a corrected or recovered error happens the x86 kernel writes a record describing
the MCE into a internal ring buffer available through the
.I /dev/mcelog
device
@@ -43,7 +45,11 @@ decodes them into a human readable format and prints them
on the standard output or optionally into the system log.
Optionally it can also take more options like keeping statistics or
-triggering shell scripts on specific events.
+triggering shell scripts on specific events. By default mcelog
+supports offlining memory pages with persistent corrected errors,
+offlining CPU cores if they developed cache problems,
+and otherwise logging specific events to the system log after
+they crossed a threshold.
The normal operating modi for mcelog are running
as a regular cron job (traditional way, deprecated),
@@ -112,12 +118,12 @@ and undocumented now.
With the
.B \-\-dmi
-option mcelog will look up the addresses reported in machine
+option mcelog will look up the DIMMs reported in machine
checks in the
.I SMBIOS/DMI
-tables of the BIOS.
-This can sometimes tell you which DIMM or memory controller
-has developed a problem. More often the information reported
+tables of the BIOS and map the DIMMs to board identifiers.
+This only works when the BIOS reports the identifiers correctly.
+Unfortunately often the information reported
by the BIOS is either subtly or obviously wrong or useless.
This option requires that mcelog has read access to /dev/mem
(normally requires root) and runs on the same machine
@@ -281,6 +287,9 @@ option use
use
.I logfile = /tmp/logfile
+For more information on the config file please see
+.B mcelog.conf(5).
+
.SH NOTES
The kernel prefers old messages over new. If the log buffer overflows
only old ones will be kept.
@@ -308,9 +317,14 @@ restarting the daemon.
.\"/var/lib/memory-errors
.SH SEE ALSO
+.BR mcelog.conf(5),
+.BR mcelog.triggers(5)
+
+http://www.mcelog.org
+
AMD x86-64 architecture programmer's manual, Volume 2, System programming
Intel 64 and IA32 Architectures Software Developer's manual, Volume 3, System programming guide
-Parts 1 and 2. Machine checks are described in Chapter 14 in Part1 and in Appendix E in Part2.
+Chapter 15 and 16. http://www.intel.com/sdm
Datasheet of your CPU.
diff --git a/mcelog.c b/mcelog.c
index 95a913f..96c0a9d 100644
--- a/mcelog.c
+++ b/mcelog.c
@@ -231,6 +231,9 @@ static char *cputype_name[] = {
[CPU_IVY_BRIDGE_EPEX] = "Ivy Bridge EP/EX", /* Fill in better name */
[CPU_HASWELL] = "Haswell", /* Fill in better name */
[CPU_HASWELL_EPEX] = "Haswell EP/EX", /* Fill in better name */
+ [CPU_BROADWELL] = "Broadwell",
+ [CPU_KNIGHTS_LANDING] = "Knights Landing",
+ [CPU_ATOM] = "ATOM",
};
static struct config_choice cpu_choices[] = {
@@ -269,7 +272,10 @@ static struct config_choice cpu_choices[
{ "haswell", CPU_HASWELL }, /* Fill in better name */
{ "haswell-ep", CPU_HASWELL_EPEX }, /* Fill in better name */
{ "haswell-ex", CPU_HASWELL_EPEX }, /* Fill in better name */
- {}
+ { "broadwell", CPU_BROADWELL },
+ { "knightslanding", CPU_KNIGHTS_LANDING },
+ { "atom", CPU_ATOM },
+ { NULL }
};
static void print_cputypes(void)
@@ -430,7 +436,8 @@ static void dump_mce(struct mce *m, unsi
mod);
}
if (cputype != CPU_SANDY_BRIDGE_EP && cputype != CPU_IVY_BRIDGE_EPEX &&
- cputype != CPU_HASWELL_EPEX)
+ cputype != CPU_HASWELL_EPEX && cputype != CPU_BROADWELL &&
+ cputype != CPU_KNIGHTS_LANDING)
resolveaddr(m->addr);
if (!ascii_mode && ismemerr && (m->status & MCI_STATUS_ADDRV)) {
diskdb_resolve_addr(m->addr);
@@ -517,7 +524,7 @@ int is_cpu_supported(void)
if (family == 15) {
cputype = CPU_K8;
} else if (family >= 16) {
- SYSERRprintf("AMD Processor family %d: Please use the edac_mce_amd module instead.\n", family);
+ SYSERRprintf("ERROR: AMD Processor family %d: mcelog does not support this processor. Please use the edac_mce_amd module instead.\n", family);
return 0;
}
} else if (!strcmp(vendor,"GenuineIntel"))
@@ -741,7 +748,7 @@ restart:
else
s += 3;
- n = sscanf(s, "%02x:<%016Lx> {%100s}%n",
+ n = sscanf(s, "%02x:<%016Lx> {%99s}%n",
&cs,
&m.ip,
symbol, &next);
@@ -1377,7 +1384,7 @@ int main(int ac, char **av)
d.buf = xalloc(d.recordlen * d.loglen);
if (daemon_mode) {
- prefill_memdb();
+ prefill_memdb(do_dmi);
if (!do_dmi)
closedmi();
server_setup();
diff --git a/mcelog.conf b/mcelog.conf
index 6a2be26..f8abb99 100644
--- a/mcelog.conf
+++ b/mcelog.conf
@@ -9,36 +9,36 @@
# white space is not allowed in value currently, except at the end where it is dropped
#
-# in general all command line options that are not commands work here
-# see man mcelog or mcelog --help for a list
+# In general all command line options that are not commands work here.
+# See man mcelog or mcelog --help for a list.
# e.g. to enable the --no-syslog option use
#no-syslog = yes (or no to disable)
# when the option has a argument
#logfile = /tmp/logfile
-# below are the options which are not command line options
+# below are the options which are not command line options.
# Set CPU type for which mcelog decodes events:
#cpu = type
-# for valid values for type please see mcelog --help
+# For valid values for type please see mcelog --help.
# If this value is set incorrectly the decoded output will be likely incorrect.
-# by default when this parameter is not set mcelog uses the CPU it is running on
+# By default when this parameter is not set mcelog uses the CPU it is running on
# on very new kernels the mcelog events reported by the kernel also carry
# the CPU type which is used too when available and not overriden.
# Enable daemon mode:
#daemon = yes
# By default mcelog just processes the currently pending events and exits.
-# in daemon mode it will keep running as a daemon in the background and poll
+# In daemon mode it will keep running as a daemon in the background and poll
# the kernel for events and then decode them.
-# Filter out known broken events by default
+# Filter out known broken events by default.
filter = yes
-# don't log memory errors individually
-# they still get accounted if that is enabled
+# Don't log memory errors individually.
+# They still get accounted if that is enabled.
#filter-memory-errors = yes
# output in undecoded raw format to be easier machine readable
-# (default is decoded)
+# (default is decoded).
#raw = yes
# Set CPU Mhz to decode uptime from time stamp counter (output
@@ -62,16 +62,17 @@ filter = yes
# Append log output to logfile instead of stdout. Only when no syslog logging is active
#logfile = filename
-# Use SMBIOS information to decode DIMMs (needs root)
-# This function is not recommended to use right now and generally not needed
+# Use SMBIOS information to decode DIMMs (needs root).
+# This function is not recommended to use right now and generally not needed.
# The exception is memdb prepopulation, which is configured separately below.
#dmi = no
-# when in daemon mode run as this user after set up
-# note that the triggers will run as this user too
-# setting this to non root will mean that triggers cannot take some corrective
-# action, like offlining objects
+# When in daemon mode run as this user after set up.
+# Note that the triggers will run as this user too.
+# Setting this to non root will mean that triggers cannot take some corrective
+# action, like offlining objects.
#run-credentials-user = root
+
# group to run as daemon with
# default to the group of the run-credentials-user
#run-credentials-group = nobody
@@ -79,72 +80,88 @@ filter = yes
[server]
# user allowed to access client socket.
# when set to * match any
-# root is always allowed to access
+# root is always allowed to access.
# default: root only
client-user = root
# group allowed to access mcelog
-# when no group is configured any group matches (but still user checking)
+# When no group is configured any group matches (but still user checking).
# when set to * match any
#client-group = root
-# path to the unix socket for client<->server communication
-# when no socket-path is configured the server will not start
+# Path to the unix socket for client<->server communication.
+# When no socket-path is configured the server will not start
#socket-path = /var/run/mcelog-client
-# when mcelog starts it checks if a server is already running. timeout
+# When mcelog starts it checks if a server is already running. This configures the timeout
# for this check.
#initial-ping-timeout = 2
#
[dimm]
# Is the in memory DIMM error tracking enabled?
# Only works on systems with integrated memory controller and
-# which are supported
-# Only takes effect in daemon mode
+# which are supported.
+# Only takes effect in daemon mode.
dimm-tracking-enabled = yes
-# Use DMI information from the BIOS to prepopulate DIMM database
+# Use DMI information from the BIOS to prepopulate DIMM database.
# Note this might not work with all BIOS and requires mcelog to run as root.
# Alternative is to let mcelog create DIMM objects on demand.
dmi-prepopulate = yes
#
-# execute these triggers when the rate of corrected or uncorrected
-# errors per DIMM exceeds the threshold
+# Execute these triggers when the rate of corrected or uncorrected
+# Errors per DIMM exceeds the threshold.
# Note when the hardware does not report DIMMs this might also
-# be per channel
+# be per channel.
# The default of 10/24h is reasonable for server quality
-# DDR3 DIMMs as of 2009/10
+# DDR3 DIMMs as of 2009/10.
#uc-error-trigger = dimm-error-trigger
uc-error-threshold = 1 / 24h
#ce-error-trigger = dimm-error-trigger
ce-error-threshold = 10 / 24h
[socket]
-# Memory error accounting per socket
+# Enable memory error accounting per socket.
socket-tracking-enabled = yes
-# Threshold and trigger for uncorrected memory errors on a socket
+
+# Threshold and trigger for uncorrected memory errors on a socket.
# mem-uc-error-trigger = socket-memory-error-trigger
+
mem-uc-error-threshold = 100 / 24h
-# Threshold and trigger for corrected memory errors on a socket
+
+# Trigger script for corrected memory errors on a socket.
mem-ce-error-trigger = socket-memory-error-trigger
+
+# Threshold on when to trigger a correct error for the socket.
+
mem-ce-error-threshold = 100 / 24h
+
# Log socket error threshold explicitely?
mem-ce-error-log = yes
+# Trigger script for uncorrected bus error events
bus-uc-threshold-trigger = bus-error-trigger
+
+# Trigger script for uncorrected IOMCA erors
iomca-threshold-trigger = iomca-error-trigger
+
+# Trigger script for other uncategorized errors
unknown-threshold-trigger = unknown-error-trigger
[cache]
-# Processing of cache error thresholds reported by Intel CPUs
+# Processing of cache error thresholds reported by Intel CPUs.
cache-threshold-trigger = cache-error-trigger
+
# Should cache threshold events be logged explicitely?
cache-threshold-log = yes
[page]
-# Memory error accouting per 4K memory page
-# Threshold for the correct memory errors trigger script
+# Memory error accouting per 4K memory page.
+# Threshold for the correct memory errors trigger script.
memory-ce-threshold = 10 / 24h
-# Trigger script for corrected errors
+
+# Trigger script for corrected errors.
# memory-ce-trigger = page-error-trigger
+
# Should page threshold events be logged explicitely?
memory-ce-log = yes
+
# specify the internal action in mcelog to exceeding a page error threshold
# this is done in addition to executing the trigger script if available
# off no action
diff --git a/mcelog.conf.5 b/mcelog.conf.5
new file mode 100644
index 0000000..5a9afda
--- /dev/null
+++ b/mcelog.conf.5
@@ -0,0 +1,283 @@
+
+." Auto generated mcelog.conf manpage. Do not edit.
+.TH "mcelog.conf" 5 "mcelog"
+
+.SH NAME
+mcelog.conf \- mcelog.conf reference
+.SH SYNOPSIS
+.B /etc/mcelog.conf
+.SH DESCRIPTION
+
+/etc/mcelog.conf is the main configuration file for
+.B mcelog(8).
+This is configuration file separated into sections including
+a default section.
+
+
+General format
+.PP
+.B optionname = value
+.PP
+White space is not allowed in value currently, except at the end where it is dropped
+
+.PP
+.PP
+In general all command line options that are not commands work here.
+See man mcelog or mcelog --help for a list.
+e.g. to enable the --no-syslog option use
+.PP
+.B no-syslog = yes (or no to disable)
+.PP
+When the option has a argument
+.PP
+.B logfile = /tmp/logfile
+.PP
+Below are the options which are not command line options.
+.PP
+.PP
+Set cpu type for which mcelog decodes events:
+.PP
+.B cpu = type
+.PP
+For valid values for type please see mcelog --help.
+If this value is set incorrectly the decoded output will be likely incorrect.
+By default when this parameter is not set mcelog uses the CPU it is running on
+on very new kernels the mcelog events reported by the kernel also carry
+the CPU type which is used too when available and not overriden.
+.PP
+.PP
+Enable daemon mode:
+.PP
+.B daemon = yes
+.PP
+By default mcelog just processes the currently pending events and exits.
+In daemon mode it will keep running as a daemon in the background and poll
+the kernel for events and then decode them.
+.PP
+.PP
+Filter out known broken events by default.
+.PP
+.B filter = yes
+.PP
+Don't log memory errors individually.
+They still get accounted if that is enabled.
+.PP
+.B filter-memory-errors = yes
+.PP
+.PP
+Output in undecoded raw format to be easier machine readable
+(default is decoded).
+.PP
+.B raw = yes
+.PP
+.PP
+Set cpu mhz to decode uptime from time stamp counter (output
+unreliable, not needed on new kernels which report the event time
+directly. A lot of systems don't have a linear time stamp clock
+and the output is wrong then.
+Normally mcelog tries to figure out if it the TSC is reliable
+and only uses the current frequency then.
+Setting a frequency forces timestamp decoding.
+This setting is obsolete with modern kernels which report the time
+directly.
+.PP
+.B cpumhz = 1800.00
+.PP
+.PP
+Log output options
+Log decoded machine checks in syslog (default stdout or syslog for daemon)
+.PP
+.B syslog = yes
+.PP
+Log decoded machine checks in syslog with error level
+.PP
+.B syslog-error = yes
+.PP
+Never log anything to syslog
+.PP
+.B no-syslog = yes
+.PP
+Append log output to logfile instead of stdout. only when no syslog logging is active
+.PP
+.B logfile = filename
+.PP
+.PP
+Use smbios information to decode dimms (needs root).
+This function is not recommended to use right now and generally not needed.
+The exception is memdb prepopulation, which is configured separately below.
+.PP
+.B dmi = no
+.PP
+.PP
+When in daemon mode run as this user after set up.
+Note that the triggers will run as this user too.
+Setting this to non root will mean that triggers cannot take some corrective
+action, like offlining objects.
+.PP
+.B run-credentials-user = root
+.PP
+.PP
+Group to run as daemon with
+default to the group of the run-credentials-user
+.PP
+.B run-credentials-group = nobody
+.PP
+.PP
+.SS "The server config section"
+User allowed to access client socket.
+when set to * match any
+root is always allowed to access.
+default: root only
+.PP
+.B client-user = root
+.PP
+Group allowed to access mcelog
+When no group is configured any group matches (but still user checking).
+when set to * match any
+.PP
+.B client-group = root
+.PP
+Path to the unix socket for client<->server communication.
+When no socket-path is configured the server will not start
+.PP
+.B socket-path = /var/run/mcelog-client
+.PP
+When mcelog starts it checks if a server is already running. this configures the timeout
+for this check.
+.PP
+.B initial-ping-timeout = 2
+.PP
+
+.PP
+.SS "The dimm config section"
+Is the in memory dimm error tracking enabled?
+Only works on systems with integrated memory controller and
+which are supported.
+Only takes effect in daemon mode.
+.PP
+.B dimm-tracking-enabled = yes
+.PP
+Use dmi information from the bios to prepopulate dimm database.
+Note this might not work with all BIOS and requires mcelog to run as root.
+Alternative is to let mcelog create DIMM objects on demand.
+.PP
+.B dmi-prepopulate = yes
+.PP
+
+Execute these triggers when the rate of corrected or uncorrected
+Errors per DIMM exceeds the threshold.
+Note when the hardware does not report DIMMs this might also
+be per channel.
+The default of 10/24h is reasonable for server quality
+DDR3 DIMMs as of 2009/10.
+.PP
+.B uc-error-trigger = dimm-error-trigger
+.PP
+.B uc-error-threshold = 1 / 24h
+.PP
+.B ce-error-trigger = dimm-error-trigger
+.PP
+.B ce-error-threshold = 10 / 24h
+.PP
+.PP
+.SS "The socket config section"
+Enable memory error accounting per socket.
+.PP
+.B socket-tracking-enabled = yes
+.PP
+.PP
+Threshold and trigger for uncorrected memory errors on a socket.
+mem-uc-error-trigger = socket-memory-error-trigger
+.PP
+.PP
+.B mem-uc-error-threshold = 100 / 24h
+.PP
+.PP
+Trigger script for corrected memory errors on a socket.
+.PP
+.B mem-ce-error-trigger = socket-memory-error-trigger
+.PP
+.PP
+Threshold on when to trigger a correct error for the socket.
+.PP
+.PP
+.B mem-ce-error-threshold = 100 / 24h
+.PP
+.PP
+ log socket error threshold explicitely?
+.PP
+.B mem-ce-error-log = yes
+.PP
+.PP
+Trigger script for uncorrected bus error events
+.PP
+.B bus-uc-threshold-trigger = bus-error-trigger
+.PP
+.PP
+Trigger script for uncorrected iomca erors
+.PP
+.B iomca-threshold-trigger = iomca-error-trigger
+.PP
+.PP
+Trigger script for other uncategorized errors
+.PP
+.B unknown-threshold-trigger = unknown-error-trigger
+.PP
+.PP
+.SS "The cache config section"
+Processing of cache error thresholds reported by intel cpus.
+.PP
+.B cache-threshold-trigger = cache-error-trigger
+.PP
+.PP
+Should cache threshold events be logged explicitely?
+.PP
+.B cache-threshold-log = yes
+.PP
+.PP
+.SS "The page config section"
+Memory error accouting per 4k memory page.
+Threshold for the correct memory errors trigger script.
+.PP
+.B memory-ce-threshold = 10 / 24h
+.PP
+.PP
+Trigger script for corrected errors.
+memory-ce-trigger = page-error-trigger
+.PP
+.PP
+Should page threshold events be logged explicitely?
+.PP
+.B memory-ce-log = yes
+.PP
+.PP
+Specify the internal action in mcelog to exceeding a page error threshold
+this is done in addition to executing the trigger script if available
+off no action
+account only account errors
+soft try to soft-offline page without killing any processes
+ This requires an uptodate kernel. Might not be successfull.
+hard try to hard-offline page by killing processes
+ Requires an uptodate kernel. Might not be successfull.
+soft-then-hard First try to soft offline, then try hard offlining
+.PP
+.B memory-ce-action = off|account|soft|hard|soft-then-hard
+.PP
+.B memory-ce-action = soft
+.PP
+.PP
+.SS "The trigger config section"
+Maximum number of running triggers
+.PP
+.B children-max = 2
+.PP
+Execute triggers in this directory
+.PP
+.B directory = /etc/mcelog
+.PP
+
+.SH SEE ALSO
+.BR mcelog (8)
+,
+.B http://www.mcelog.org
+
diff --git a/mcelog.h b/mcelog.h
index 550a0a5..6c097cf 100644
--- a/mcelog.h
+++ b/mcelog.h
@@ -65,14 +65,18 @@ struct mce {
#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
#define MCI_STATUS_S (1ULL<<56) /* signalled */
#define MCI_STATUS_AR (1ULL<<55) /* action-required */
+#define MCI_STATUS_FWST (1ULL<<37) /* Firmware updated status indicator */
#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
#define MCG_STATUS_EIPV (1ULL<<1) /* eip points to correct instruction */
#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
+#define MCG_STATUS_LMCES (1ULL<<3) /* local machine check signaled */
#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
#define MCG_TES_P (1ULL<<11) /* Yellow bit cache threshold supported */
#define MCG_SER_P (1ULL<<24) /* MCA recovery / new status */
+#define MCG_ELOG_P (1ULL<<26) /* Extended error log supported */
+#define MCG_LMCE_P (1ULL<<27) /* Local machine check supported */
#define NELE(x) (sizeof(x)/sizeof(*(x)))
#define err(x) perror(x),exit(1)
@@ -119,6 +123,9 @@ enum cputype {
CPU_IVY_BRIDGE_EPEX,
CPU_HASWELL,
CPU_HASWELL_EPEX,
+ CPU_BROADWELL,
+ CPU_KNIGHTS_LANDING,
+ CPU_ATOM,
};
enum option_ranges {
diff --git a/mcelog.service b/mcelog.service
new file mode 100644
index 0000000..c5aaf07
--- /dev/null
+++ b/mcelog.service
@@ -0,0 +1,10 @@
+[Unit]
+Description=Machine Check Exception Logging Daemon
+After=syslog.target
+
+[Service]
+ExecStart=/usr/sbin/mcelog --ignorenodev --daemon --foreground
+StandardOutput=syslog
+
+[Install]
+WantedBy=multi-user.target
diff --git a/mcelog.triggers.5 b/mcelog.triggers.5
new file mode 100644
index 0000000..510bbef
--- /dev/null
+++ b/mcelog.triggers.5
@@ -0,0 +1,231 @@
+'\" t
+.TH "mcelog.triggers" 5 "mcelog"
+.SH NAME
+mcelog.triggers \- mcelog trigger scripts reference
+.SH SYNOPSIS
+.B /etc/mcelog/bus-error-trigger
+.br
+.B /etc/mcelog/cache-error-trigger
+.br
+.B /etc/mcelog/dimm-error-trigger
+.br
+.B /etc/mcelog/iomca-error-trigger
+.br
+.B /etc/mcelog/page-error-trigger
+.br
+.B /etc/mcelog/socket-memory-error-trigger
+.br
+.B /etc/mcelog/unknown-error-trigger
+.br
+.SH DESCRIPTION
+.BR mcelog(8)
+maintains thresholds of errors using a
+.I leaky-bucket
+algorithm.
+When the number of errors in a specific
+time window exceeds a pre-configured threshold a
+.I trigger
+will be executed. Triggers are usually shell scripts in the
+.B /etc/mcelog
+directory
+but can be also other internal actions. Thresholds and triggers
+can be configured in
+.BR mcelog.conf(5)
+
+Trigger will run as the user configured for mcelog
+in
+.I mcelog.conf,
+by default root. The default trigger action can
+be overridden by specifying a different trigger script in the configuration file.
+Actions in addition to the default trigger
+(like notifying an administrator) can be put into the respective
+.I /etc/mcelog/*.local
+script which is executed after the default action. This allows updating the default
+scripts without overriding local actions. All trigger actions are also
+logged to syslog.
+.PP
+.B "The DIMM and socket memory error triggers"
+.PP
+The
+.B /etc/mcelog/dimm-error-trigger
+and
+.B /etc/mcelog/socket-memory-error-trigger
+scripts are executed when a DIMM or a CPU socket exceeds
+a configured corrected or uncorrected memory error threshold.
+The thresholds are configured in the
+.B mcelog.conf
+.I [dimm]
+and
+.I [socket]
+sections.
+The default triggers log a warning message in the system log.
+The triggers are only executed when mcelog runs as a daemon.
+
+Arguments are passed as environment variables
+.TS
+tab(:);
+l l.
+THRESHOLD:human readable threshold status
+MESSAGE:Human readable consolidated error message
+TOTALCOUNT:total corrected or uncorrected count of errors for current DIMM depending on what triggered the event
+LOCATION:Consolidated location as a single string
+DMI_LOCATION:DIMM location from DMI/SMBIOS if available
+DMI_NAME:DIMM identifier from DMI/SMBIOS if available
+DIMM:DIMM number reported by hardware
+CHANNEL:Channel number reported by hardware
+SOCKETID:Socket ID of CPU that includes the memory controller with the DIMM
+CECOUNT:Total corrected error count for DIMM
+UCCOUNT:Total uncorrected error count for DIMM
+LASTEVENT:Time stamp of event that triggered threshold (in time_t format, seconds)
+THRESHOLD_COUNT:Total umber of events in current threshold time period of specific type
+.TE
+
+After the default action local actions in
+.B /etc/mcelog/dimm-error-trigger.local
+or respective
+.B /etc/mcelog/socket-memory-error-trigger.local
+are executed.
+
+.PP
+.B "The page error trigger"
+.PP
+The
+.B /etc/mcelog/page-error-trigger
+script is
+executed by mcelog in daemon mode when a page
+in memory exceeds a pre-configured corrected or uncorrected error threshold.
+mcelog internally also implements offlining the page through the kernel.
+This is configured through the
+.I [page]
+section of
+.BR mcelog.conf(5)
+.PP
+The environment arguments are the same as for the
+.I dimm-error-trigger
+script
+.PP
+After the default action local actions in
+.I /etc/mcelog/page-error-trigger.loccal are executed.
+
+.PP
+.B "The cache error trigger"
+.PP
+The
+.I /etc/mcelog/cache-error-trigger
+shell script is called for cache error handling in daemon mode
+when a CPU reports excessive corrected cache errors.
+This could be a indication for future uncorrected errors.
+.PP
+This trigger is configured through the
+.B [cache]
+section in the
+.BR mcelog.conf(5)
+configuration file. The threshold is defined by the CPU. The default trigger offlines the affected CPU cores, unless it is the last core running.
+.PP
+Arguments are passed as environment variables
+.TS
+tab(:);
+l l.
+MESSAGE:Human readable error message
+CPU:Linux CPU number that triggered the error
+LEVEL:Cache level affected by error
+TYPE:Cache type affected by error (Data,Instruction,Generic)
+AFFECTED_CPUS:List of CPUs sharing the affected cache
+SOCKETID:Socket ID of affected CPU
+.TE
+.PP
+After the default action local actions in
+.I /etc/mcelog/cache-error-trigger.local are executed.
+.PP
+.B "The bus-uc-threshold-trigger"
+.PP
+The
+.B bus-uc-threshold-trigger
+runs on uncorrected errors on a IO bus. It is configured through the
+.B bus-uc-threshold-trigger
+and
+.B bus-uc-threshold-trigger-threshold
+options in
+.I /etc/mcelog.conf(5).
+By default it logs a message with the error location to the system log.
+After the default action local actions in
+.I /etc/mcelog/bus-uc-error-trigger.local
+are executed.
+.PP
+Arguments are passed as environment variables
+.TS
+tab(:);
+l l.
+MESSAGE:Human readable consolidated error message.
+LOCATION:Consolidated location as a single string
+SOCKETID:Socket ID of CPU that includes the memory controller with the DIMM
+LEVEL:Interconnect level
+PARTICIPATION:Processor Participation (Originator, Responder or Observer)
+REQUEST:Request type (read, write, prefetch, etc.)
+ORIGIN :Memory or IO
+TIMEOUT:The request timed out or not
+.TE
+.PP
+.B "The iomca-error-trigger"
+.PP
+The
+.B iomca-error-trigger
+runs when a socket receives bus or interconnect errors.
+It is configured through the
+.B iomca-error-trigger
+and
+.B iomca-error-trigger-threshold
+options in
+.I /etc/mcelog.conf. By default it logs a message with the error location to the system log.
+After the default action local actions in
+.I /etc/mcelog/iomca-error-trigger.local are executed.
+.PP
+Arguments are passed as environment variables
+.TS
+tab(:);
+l l.
+MESSAGE:Human readable consolidated error message
+LOCATION:Consolidated location as a single string
+SOCKETID:Socket ID of CPU that includes the memory controller with the DIMM
+CPU:Linux CPU number that triggered the error
+SET:PCI segment number
+BUS:PCI bus number
+DEVICE:PCI device number
+FUNCTION:PCI function number
+.TE
+.PP
+.B "The unknown-error-trigger"
+.PP
+The
+.B unknown-error-trigger
+runs on any errors not otherwise categorized.
+It is configured through the
+.B unknown-error-trigger
+and
+.B unknown-error-trigger-threshold
+options in
+.I /etc/mcelog.conf.
+By default it logs a message to the system log.
+After the default action local actions in
+.I /etc/mcelog/unknown-error-trigger.local
+are executed.
+.PP
+Arguments are passed as environment variables
+.TS
+tab(:);
+l l.
+MESSAGE:Human readable consolidated error message
+LOCATION:Consolidated location as a single string
+SOCKETID:Socket ID of CPU that includes the memory controller with the DIMM
+CPU:Linux CPU number that triggered the error
+STATUS:IA32_MCi_STATUS register value
+ADDR:IA32_MCi_ADDR register value
+MISC:IA32_MCi_MISC register value
+MCGSTATUS:IA32_MCG_STATUS register value
+MCGCAP:IA32_MCG_CAP register value
+.TE
+.SH SEE ALSO
+http://www.mcelog.org
+
+.B mcelog(8),
+.B mcelog.conf(5)
diff --git a/memdb.c b/memdb.c
index bde8113..7a33750 100644
--- a/memdb.c
+++ b/memdb.c
@@ -270,6 +270,7 @@ static void dump_errtype(char *name, struct err_type *e, FILE *f, enum printflag
int all = (flags & DUMP_ALL);
char *s;
+ bucket_age(bc, &e->bucket, bucket_time());
if (e->count || e->bucket.count || all)
fprintf(f, "%s:\n", name);
if (e->count || all) {
@@ -382,7 +383,7 @@ parse_dimm_addr(char *bl, unsigned *socketid, unsigned *channel, unsigned *dimm)
}
/* Prepopulate DIMM database from BIOS information */
-void prefill_memdb(void)
+void prefill_memdb(int do_dmi)
{
static int initialized;
int i;
@@ -395,7 +396,7 @@ void prefill_memdb(void)
if (!memdb_enabled)
return;
initialized = 1;
- if (config_bool("dimm", "dmi-prepopulate") == 0)
+ if (config_bool("dimm", "dmi-prepopulate") == 0 || !do_dmi)
return;
if (opendmi() < 0)
return;
diff --git a/memdb.h b/memdb.h
index 5c68581..afc3348 100644
--- a/memdb.h
+++ b/memdb.h
@@ -11,7 +11,7 @@ enum printflags {
DUMP_BIOS = (1 << 1),
};
-void prefill_memdb(void);
+void prefill_memdb(int do_dmi);
void memdb_config(void);
void dump_memory_errors(FILE *f, enum printflags flags);
diff --git a/p4.c b/p4.c
index f938196..2bf1eee 100644
--- a/p4.c
+++ b/p4.c
@@ -317,6 +317,10 @@ static int decode_mci(__u64 status, __u64 misc, int cpu, unsigned mcgcap, int *i
if (status & (MCI_STATUS_S|MCI_STATUS_AR))
Wprintf("%s\n", arstate[(status >> 55) & 3]);
+ if ((mcgcap & MCG_SER_P) && (status & MCI_STATUS_FWST)) {
+ Wprintf("Firmware may have updated this error\n");
+ }
+
if ((mcgcap == 0 || (mcgcap & MCG_TES_P)) && !(status & MCI_STATUS_UC)) {
track = (status >> 53) & 3;
decode_tracking(track);
@@ -334,6 +338,8 @@ static void decode_mcg(__u64 mcgstatus)
Wprintf("EIPV ");
if (mcgstatus & MCG_STATUS_MCIP)
Wprintf("MCIP ");
+ if (mcgstatus & MCG_STATUS_LMCES)
+ Wprintf("LMCE ");
Wprintf("\n");
}
diff --git a/server.c b/server.c
index 344eb38..a1fa7da 100644
--- a/server.c
+++ b/server.c
@@ -291,7 +291,7 @@ static int server_ping(struct sockaddr_un *un)
{
struct sigaction oldsa;
struct sigaction sa = { .sa_handler = ping_timeout };
- int ret = -1, n;
+ int ret, n;
char buf[10];
int fd = socket(PF_UNIX, SOCK_STREAM, 0);
if (fd < 0)
@@ -299,6 +299,7 @@ static int server_ping(struct sockaddr_un *un)
sigaction(SIGALRM, &sa, &oldsa);
if (sigsetjmp(ping_timeout_ctx, 1) == 0) {
+ ret = 0;
alarm(initial_ping_timeout);
if (connect(fd, un, sizeof(struct sockaddr_un)) < 0)
goto cleanup;
@@ -308,7 +309,8 @@ static int server_ping(struct sockaddr_un *un)
goto cleanup;
if (n == 5 && !memcmp(buf, "pong\n", 5))
ret = 0;
- }
+ } else
+ ret = -1;
cleanup:
sigaction(SIGALRM, &oldsa, NULL);
alarm(0);
diff --git a/tests/test b/tests/test
index 35bebd2..148bf1f 100755
--- a/tests/test
+++ b/tests/test
@@ -17,6 +17,8 @@ if [ "$(whoami)" != "root" ] ; then
exit 1
fi
+[ ! -f /dev/mce-inject ] && modprobe mce-inject
+
echo "++++++++++++ running $1 test +++++++++++++++++++"
# disable trigger
diff --git a/trigger.c b/trigger.c
index 19466a6..5caca34 100644
--- a/trigger.c
+++ b/trigger.c
@@ -115,11 +115,18 @@ static void finish_child(pid_t child, int status)
static void child_handler(int sig, siginfo_t *si, void *ctx)
{
int status;
+ pid_t pid;
+
if (waitpid(si->si_pid, &status, WNOHANG) < 0) {
SYSERRprintf("Cannot collect child %d", si->si_pid);
return;
}
finish_child(si->si_pid, status);
+
+ /* Check other child(ren)'s status to avoid zombie process */
+ while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
+ finish_child(pid, status);
+ }
}
void trigger_setup(void)
diff --git a/triggers/bus-error-trigger b/triggers/bus-error-trigger
old mode 100644
new mode 100755
diff --git a/triggers/iomca-error-trigger b/triggers/iomca-error-trigger
old mode 100644
new mode 100755
diff --git a/triggers/unknown-error-trigger b/triggers/unknown-error-trigger
old mode 100644
new mode 100755
index b924a0e..fa2866c
--- a/triggers/unknown-error-trigger
+++ b/triggers/unknown-error-trigger
@@ -9,7 +9,7 @@
# CPU Linux CPU number that triggered the error
# STATUS IA32_MCi_STATUS register value
# ADDR IA32_MCi_ADDR register value
-# MISC IA32_MCi_MISC regiser value
+# MISC IA32_MCi_MISC register value
# MCGSTATUS IA32_MCG_STATUS register value
# MCGCAP IA32_MCG_CAP register value
# For details on the register layout please see the Intel SDM http://www.intel.com/sdm