Blame SOURCES/mcelog-update-e7e0ac1.patch

09c1d0
diff --git a/CHANGES b/CHANGES
09c1d0
index cd279c4..e3c4044 100644
09c1d0
--- a/CHANGES
09c1d0
+++ b/CHANGES
09c1d0
@@ -1,5 +1,9 @@
09c1d0
 <newer changes first>
09c1d0
 
09c1d0
+Changes file is obsolete.
09c1d0
+Please see git log on https://git.kernel.org/cgit/utils/cpu/mce/mcelog.git/
09c1d0
+for newer changes.
09c1d0
+
09c1d0
 Add Linux Kongress 2010 paper
09c1d0
 Add Sandy Bridge Support
09c1d0
 Write pid file by default in daemon mode
09c1d0
diff --git a/Makefile b/Makefile
09c1d0
index f8199f6..f3ba998 100644
09c1d0
--- a/Makefile
09c1d0
+++ b/Makefile
09c1d0
@@ -1,6 +1,7 @@
09c1d0
 CFLAGS := -g -Os
09c1d0
 prefix := /usr
09c1d0
 etcprefix :=
09c1d0
+MANDIR := ${prefix}/share/man
09c1d0
 # Define appropiately for your distribution
09c1d0
 # DOCDIR := /usr/share/doc/packages/mcelog
09c1d0
 
09c1d0
@@ -54,21 +55,27 @@ SRC := $(OBJ:.o=.c)
09c1d0
 mcelog: ${OBJ}
09c1d0
 
09c1d0
 # dbquery intentionally not installed by default
09c1d0
-install: mcelog
09c1d0
-	mkdir -p $(DESTDIR)${etcprefix}/etc/mcelog $(DESTDIR)${prefix}/sbin $(DESTDIR)${prefix}/share/man/man8
09c1d0
+install: mcelog mcelog.conf mcelog.conf.5 mcelog.triggers.5
09c1d0
+	mkdir -p $(DESTDIR)${etcprefix}/etc/mcelog $(DESTDIR)${prefix}/sbin $(DESTDIR)$(MANDIR)/man5 $(DESTDIR)$(MANDIR)/man8
09c1d0
 	install -m 755 -p mcelog $(DESTDIR)${prefix}/sbin/mcelog
09c1d0
-	install -m 644 -p mcelog.8 $(DESTDIR)${prefix}/share/man/man8
09c1d0
+	install -m 644 -p mcelog.8 $(DESTDIR)$(MANDIR)/man8
09c1d0
+	install -m 644 -p mcelog.conf.5 $(DESTDIR)$(MANDIR)/man5
09c1d0
+	install -m 644 -p mcelog.triggers.5 $(DESTDIR)$(MANDIR)/man5
09c1d0
 	install -m 644 -p -b mcelog.conf $(DESTDIR)${etcprefix}/etc/mcelog/mcelog.conf
09c1d0
 	for i in ${TRIGGERS} ; do 						\
09c1d0
 		install -m 755 -p -b triggers/$$i $(DESTDIR)${etcprefix}/etc/mcelog ; 	\
09c1d0
 	done
09c1d0
 ifdef DOCDIR
09c1d0
+	install -d 755 $(DESTDIR)${DOCDIR} 
09c1d0
 	install -m 644 -p ${DOC} $(DESTDIR)${DOCDIR} 
09c1d0
 else
09c1d0
 	echo
09c1d0
 	echo "Consider defining DOCDIR to install additional documentation"
09c1d0
 endif
09c1d0
 
09c1d0
+mcelog.conf.5: mcelog.conf config-intro.man
09c1d0
+	./genconfig.py mcelog.conf config-intro.man > mcelog.conf.5
09c1d0
+
09c1d0
 clean: test-clean
09c1d0
 	rm -f ${CLEAN} ${OBJ} 
09c1d0
 
09c1d0
diff --git a/README b/README
09c1d0
index 08184ed..8aa8ec4 100644
09c1d0
--- a/README
09c1d0
+++ b/README
09c1d0
@@ -2,11 +2,15 @@ mcelog is the user space backend for logging machine check errors
09c1d0
 reported by the hardware to the kernel. The kernel does the immediate
09c1d0
 actions (like killing processes etc.) and mcelog decodes the errors
09c1d0
 and manages various other advanced error responses like
09c1d0
-offlining memory, CPUs or triggering events.
09c1d0
+offlining memory, CPUs or triggering events. In addition
09c1d0
+mcelog also handles corrected errors, by logging and accounting them.
09c1d0
 
09c1d0
 It primarily handles machine checks and thermal events, which
09c1d0
 are reported for errors detected by the CPU.
09c1d0
 
09c1d0
+For more details on what mcelog can do and the underlying theory
09c1d0
+see http://www.mcelog.org
09c1d0
+
09c1d0
 It is recommended that mcelog runs on all x86 machines, both
09c1d0
 64bit (since early 2.6) and 32bit (since 2.6.32)
09c1d0
 
09c1d0
@@ -40,6 +44,11 @@ mce.pdf is a very old paper describing the first releases of mcelog
09c1d0
 
09c1d0
 For distributors:
09c1d0
 
09c1d0
+You can run mcelog from systemd or similar daemons. An example
09c1d0
+systemd unit file is in mcelog.service.
09c1d0
+
09c1d0
+For older distributions using init scripts:
09c1d0
+
09c1d0
 Please install a init script by default that runs mcelog in daemon mode.
09c1d0
 The mcelog.init script is a good starting point.
09c1d0
 
09c1d0
diff --git a/client.c b/client.c
09c1d0
index 6a67683..7c7aeb8 100644
09c1d0
--- a/client.c
09c1d0
+++ b/client.c
09c1d0
@@ -29,9 +29,9 @@ void ask_server(char *command)
09c1d0
 {
09c1d0
 	struct sockaddr_un sun;
09c1d0
 	int fd;
09c1d0
+	FILE * fp;
09c1d0
 	int n;
09c1d0
 	char buf[1024];
09c1d0
-	int done;
09c1d0
 	char *path = config_string("server", "socket-path");
09c1d0
 	if (!path)
09c1d0
 		path = SOCKET_PATH;
09c1d0
@@ -52,14 +52,18 @@ void ask_server(char *command)
09c1d0
 	if (write(fd, command, n) != n)
09c1d0
 		SYSERRprintf("client command write");
09c1d0
 
09c1d0
-	done = 0;	
09c1d0
-	while (!done && (n = read(fd, buf, sizeof buf)) > 0) { 
09c1d0
-		if (n >= 5 && !memcmp(buf + n - 5, "done\n", 5)) { 
09c1d0
-			n -= 5;
09c1d0
-			done = 1;
09c1d0
+	if ((fp = fdopen(fd, "r")) != NULL) {
09c1d0
+		while (fgets(buf, sizeof buf, fp)) {
09c1d0
+			n = strlen(buf);
09c1d0
+			if (n >= 5 && !memcmp(buf + n - 5, "done\n", 5)) {
09c1d0
+				fclose(fp);
09c1d0
+				return;
09c1d0
+			}
09c1d0
+
09c1d0
+			fputs(buf, stdout);
09c1d0
 		}
09c1d0
-		write(1, buf, n);
09c1d0
+		fclose(fp);
09c1d0
 	}
09c1d0
-	if (n < 0) 
09c1d0
-		SYSERRprintf("client read");
09c1d0
+
09c1d0
+	SYSERRprintf("client read");
09c1d0
 }
09c1d0
diff --git a/config-intro.man b/config-intro.man
09c1d0
new file mode 100644
09c1d0
index 0000000..c06610d
09c1d0
--- /dev/null
09c1d0
+++ b/config-intro.man
09c1d0
@@ -0,0 +1,10 @@
09c1d0
+.SH NAME
09c1d0
+mcelog.conf \- mcelog.conf reference
09c1d0
+.SH SYNOPSIS
09c1d0
+.B /etc/mcelog.conf
09c1d0
+.SH DESCRIPTION
09c1d0
+
09c1d0
+/etc/mcelog.conf is the main configuration file for
09c1d0
+.B mcelog(8).
09c1d0
+This is configuration file separated into sections including
09c1d0
+a default section.
09c1d0
diff --git a/dmi.c b/dmi.c
09c1d0
index 290a053..b5492cd 100644
09c1d0
--- a/dmi.c
09c1d0
+++ b/dmi.c
09c1d0
@@ -162,6 +162,8 @@ static int get_efi_base_addr(size_t *address)
09c1d0
 check_symbol:
09c1d0
 	while ((fgets(linebuf, sizeof(linebuf) - 1, efi_systab)) != NULL) {
09c1d0
 		char *addrp = strchr(linebuf, '=');
09c1d0
+		if (!addrp)
09c1d0
+			break;
09c1d0
 		*(addrp++) = '\0';
09c1d0
 
09c1d0
 		if (strcmp(linebuf, "SMBIOS") == 0) {
09c1d0
diff --git a/genconfig.py b/genconfig.py
09c1d0
new file mode 100755
09c1d0
index 0000000..aed6992
09c1d0
--- /dev/null
09c1d0
+++ b/genconfig.py
09c1d0
@@ -0,0 +1,80 @@
09c1d0
+#!/usr/bin/python
09c1d0
+# generate man config documentation from mcelog.conf example
09c1d0
+# genconfig.py mcelog.conf intro.html
09c1d0
+import sys
09c1d0
+import re
09c1d0
+import string
09c1d0
+import argparse
09c1d0
+
09c1d0
+ap = argparse.ArgumentParser(description="generate man config documentation from mcelog.conf example")
09c1d0
+ap.add_argument('config', type=argparse.FileType('r'), help="mcelog example config file")
09c1d0
+ap.add_argument('intro', type=argparse.FileType('r'), help="intro file")
09c1d0
+args = ap.parse_args()
09c1d0
+
09c1d0
+def parse(f):
09c1d0
+  lineno = 1
09c1d0
+  explanation = 0
09c1d0
+  header = 1
09c1d0
+  for line in f:
09c1d0
+    lineno += 1
09c1d0
+
09c1d0
+    # skip first comment
09c1d0
+    if header:
09c1d0
+      if not re.match('^#', line):
09c1d0
+        header = 0
09c1d0
+      continue
09c1d0
+
09c1d0
+    # explanation
09c1d0
+    m = re.match('^#\s(.*)', line)
09c1d0
+    if m:
09c1d0
+      explanation += 1
09c1d0
+      s = m.group(1)
09c1d0
+      if explanation == 1:
09c1d0
+        s = string.capitalize(s)
09c1d0
+      print s
09c1d0
+      continue
09c1d0
+
09c1d0
+    if explanation:
09c1d0
+      print ".PP"
09c1d0
+      explanation = 0
09c1d0
+
09c1d0
+    # empty line: new option
09c1d0
+    if re.match('\s+', line):
09c1d0
+      new_option()
09c1d0
+      continue
09c1d0
+    # group
09c1d0
+    m = re.match('\[(.*)\]', line)
09c1d0
+    if m:
09c1d0
+      start_group(m.group(1))
09c1d0
+      continue
09c1d0
+    # config option
09c1d0
+    m = re.match('^(#?)([a-z-]+) = (.*)', line)
09c1d0
+    if m:
09c1d0
+      config_option(m.group(1), m.group(2), m.group(3))
09c1d0
+      continue
09c1d0
+    print >>sys.stderr, "Unparseable line %d" % (lineno-1)
09c1d0
+
09c1d0
+def config_option(enabled, name, value):
09c1d0
+    print ".B %s = %s" % (name, value)
09c1d0
+    print ".PP"
09c1d0
+
09c1d0
+def start_group(name):
09c1d0
+    print ".SS \"The %s config section\"" % (name)
09c1d0
+
09c1d0
+def new_option():
09c1d0
+    print ".PP"
09c1d0
+
09c1d0
+
09c1d0
+print """
09c1d0
+.\" Auto generated mcelog.conf manpage. Do not edit.
09c1d0
+.TH "mcelog.conf" 5 "mcelog"
09c1d0
+"""
09c1d0
+
09c1d0
+print args.intro.read()
09c1d0
+parse(args.config)
09c1d0
+print """
09c1d0
+.SH SEE ALSO
09c1d0
+.BR mcelog (8),
09c1d0
+.BR mcelog.triggers (5)
09c1d0
+.B http://www.mcelog.org
09c1d0
+"""
09c1d0
diff --git a/haswell.c b/haswell.c
09c1d0
index 0fef6a5..b309ae5 100644
09c1d0
--- a/haswell.c
09c1d0
+++ b/haswell.c
09c1d0
@@ -1,5 +1,5 @@
09c1d0
 /* Copyright (C) 2013 Intel Corporation
09c1d0
-   Decode Intel Ivy Bridge specific machine check errors.
09c1d0
+   Decode Intel Haswell specific machine check errors.
09c1d0
 
09c1d0
    mcelog is free software; you can redistribute it and/or
09c1d0
    modify it under the terms of the GNU General Public
09c1d0
diff --git a/intel.c b/intel.c
09c1d0
index fe08eab..f893be5 100644
09c1d0
--- a/intel.c
09c1d0
+++ b/intel.c
09c1d0
@@ -34,7 +34,8 @@ void intel_cpu_init(enum cputype cpu)
09c1d0
 	if (cpu == CPU_NEHALEM || cpu == CPU_XEON75XX || cpu == CPU_INTEL ||
09c1d0
 	    cpu == CPU_SANDY_BRIDGE || cpu == CPU_SANDY_BRIDGE_EP ||
09c1d0
 	    cpu == CPU_IVY_BRIDGE || cpu == CPU_IVY_BRIDGE_EPEX ||
09c1d0
-	    cpu == CPU_HASWELL || cpu == CPU_HASWELL_EPEX)
09c1d0
+	    cpu == CPU_HASWELL || cpu == CPU_HASWELL_EPEX || cpu == CPU_BROADWELL ||
09c1d0
+	    cpu == CPU_KNIGHTS_LANDING)
09c1d0
 		memory_error_support = 1;
09c1d0
 }
09c1d0
 
09c1d0
@@ -72,6 +73,15 @@ enum cputype select_intel_cputype(int family, int model)
09c1d0
 			return CPU_HASWELL;
09c1d0
 		else if (model == 0x3f)
09c1d0
 			return CPU_HASWELL_EPEX;
09c1d0
+		else if (model == 0x3d || model == 0x56)
09c1d0
+			return CPU_BROADWELL;
09c1d0
+		else if (model == 0x57)
09c1d0
+			return CPU_KNIGHTS_LANDING;
09c1d0
+		else if (model == 0x1c || model == 0x26 || model == 0x27 ||
09c1d0
+			 model == 0x35 || model == 0x36 || model == 0x36 ||
09c1d0
+			 model == 0x37 || model == 0x4a || model == 0x4c ||
09c1d0
+			 model == 0x4d || model == 0x5a || model == 0x5d)
09c1d0
+			return CPU_ATOM;
09c1d0
 		if (model > 0x1a) {
09c1d0
 			Eprintf("Family 6 Model %x CPU: only decoding architectural errors\n",
09c1d0
 				model);
09c1d0
diff --git a/intel.h b/intel.h
09c1d0
index 00191d5..9d109b1 100644
09c1d0
--- a/intel.h
09c1d0
+++ b/intel.h
09c1d0
@@ -19,5 +19,7 @@ extern int memory_error_support;
09c1d0
 	case CPU_IVY_BRIDGE: \
09c1d0
 	case CPU_IVY_BRIDGE_EPEX: \
09c1d0
 	case CPU_HASWELL: \
09c1d0
-	case CPU_HASWELL_EPEX
09c1d0
+	case CPU_HASWELL_EPEX: \
09c1d0
+	case CPU_BROADWELL: \
09c1d0
+	case CPU_KNIGHTS_LANDING
09c1d0
 
09c1d0
diff --git a/leaky-bucket.c b/leaky-bucket.c
09c1d0
index c2c501b..721ab22 100644
09c1d0
--- a/leaky-bucket.c
09c1d0
+++ b/leaky-bucket.c
09c1d0
@@ -25,7 +25,7 @@ time_t __attribute__((weak)) bucket_time(void)
09c1d0
 	return time(NULL);
09c1d0
 }
09c1d0
 
09c1d0
-static void bucket_age(const struct bucket_conf *c, struct leaky_bucket *b,
09c1d0
+void bucket_age(const struct bucket_conf *c, struct leaky_bucket *b,
09c1d0
 			time_t now)
09c1d0
 {
09c1d0
 	long diff;
09c1d0
diff --git a/leaky-bucket.h b/leaky-bucket.h
09c1d0
index 497719e..860ba3c 100644
09c1d0
--- a/leaky-bucket.h
09c1d0
+++ b/leaky-bucket.h
09c1d0
@@ -27,5 +27,7 @@ char *bucket_output(const struct bucket_conf *c, struct leaky_bucket *b);
09c1d0
 int bucket_conf_init(struct bucket_conf *c, const char *rate);
09c1d0
 void bucket_init(struct leaky_bucket *b);
09c1d0
 time_t bucket_time(void);
09c1d0
+void bucket_age(const struct bucket_conf *c, struct leaky_bucket *b,
09c1d0
+			time_t now);
09c1d0
 
09c1d0
 #endif
09c1d0
diff --git a/mcelog.8 b/mcelog.8
09c1d0
index f8a77c4..3781db6 100644
09c1d0
--- a/mcelog.8
09c1d0
+++ b/mcelog.8
09c1d0
@@ -1,5 +1,4 @@
09c1d0
-.\" disk db commented out for now because it's not usable enough
09c1d0
-.TH MCELOG 8 "May 2009" "" "Linux's Administrator's Manual"
09c1d0
+.TH MCELOG 8 "Mar 2015" "" "Linux's Administrator's Manual"
09c1d0
 .SH NAME
09c1d0
 mcelog \- Decode kernel machine check log on x86 machines
09c1d0
 .SH SYNOPSIS
09c1d0
@@ -26,13 +25,16 @@ in main memory by an integrated memory controller, data
09c1d0
 transfer errors on the front side bus or CPU interconnect or other internal
09c1d0
 errors. 
09c1d0
 Possible causes can be cosmic radiation, instable power supplies,
09c1d0
-cooling problems, broken hardware, or bad luck.
09c1d0
+cooling problems, broken hardware, running systems out of specification,
09c1d0
+or bad luck.
09c1d0
 
09c1d0
 Most errors can be corrected by the CPU by internal error correction
09c1d0
 mechanisms. Uncorrected errors cause machine check exceptions which
09c1d0
-may panic the machine.
09c1d0
+may kill processes or panic the machine. A small number of corrected
09c1d0
+errors is usually not a cause for worry, but a large number can indicate
09c1d0
+future failure.
09c1d0
 
09c1d0
-When a corrected error happens the x86 kernel writes a record describing 
09c1d0
+When a corrected or recovered error happens the x86 kernel writes a record describing
09c1d0
 the MCE into a internal ring buffer available through the
09c1d0
 .I /dev/mcelog
09c1d0
 device
09c1d0
@@ -43,7 +45,11 @@ decodes them into a human readable format and prints them
09c1d0
 on the standard output or optionally into the system log. 
09c1d0
 
09c1d0
 Optionally it can also take more options like keeping statistics or
09c1d0
-triggering shell scripts on specific events.
09c1d0
+triggering shell scripts on specific events. By default mcelog
09c1d0
+supports offlining memory pages with persistent corrected errors,
09c1d0
+offlining CPU cores if they developed cache problems,
09c1d0
+and otherwise logging specific events to the system log after
09c1d0
+they crossed a threshold.
09c1d0
 
09c1d0
 The normal operating modi for mcelog are running 
09c1d0
 as a regular cron job (traditional way, deprecated), 
09c1d0
@@ -112,12 +118,12 @@ and undocumented now.
09c1d0
 
09c1d0
 With the
09c1d0
 .B \-\-dmi
09c1d0
-option mcelog will look up the addresses reported in machine
09c1d0
+option mcelog will look up the DIMMs reported in machine
09c1d0
 checks in the 
09c1d0
 .I SMBIOS/DMI
09c1d0
-tables of the BIOS.
09c1d0
-This can sometimes tell you which DIMM or memory controller
09c1d0
-has developed a problem. More often the information reported
09c1d0
+tables of the BIOS and map the DIMMs to board identifiers.
09c1d0
+This only works when the BIOS reports the identifiers correctly.
09c1d0
+Unfortunately often the information reported
09c1d0
 by the BIOS is either subtly or obviously wrong or useless.
09c1d0
 This option requires that mcelog has read access to /dev/mem
09c1d0
 (normally requires root) and runs on the same machine
09c1d0
@@ -281,6 +287,9 @@ option use
09c1d0
 use
09c1d0
 .I logfile = /tmp/logfile
09c1d0
 
09c1d0
+For more information on the config file please see
09c1d0
+.B mcelog.conf(5).
09c1d0
+
09c1d0
 .SH NOTES
09c1d0
 The kernel prefers old messages over new. If the log buffer overflows
09c1d0
 only old ones will be kept.
09c1d0
@@ -308,9 +317,14 @@ restarting the daemon.
09c1d0
 
09c1d0
 .\"/var/lib/memory-errors
09c1d0
 .SH SEE ALSO
09c1d0
+.BR mcelog.conf(5),
09c1d0
+.BR mcelog.triggers(5)
09c1d0
+
09c1d0
+http://www.mcelog.org
09c1d0
+
09c1d0
 AMD x86-64 architecture programmer's manual, Volume 2, System programming
09c1d0
 
09c1d0
 Intel 64 and IA32 Architectures Software Developer's manual, Volume 3, System programming guide
09c1d0
-Parts 1 and 2. Machine checks are described in Chapter 14 in Part1 and in Appendix E in Part2.
09c1d0
+Chapter 15 and 16.  http://www.intel.com/sdm
09c1d0
 
09c1d0
 Datasheet of your CPU.
09c1d0
diff --git a/mcelog.c b/mcelog.c
09c1d0
index 95a913f..96c0a9d 100644
09c1d0
--- a/mcelog.c
09c1d0
+++ b/mcelog.c
09c1d0
@@ -231,6 +231,9 @@ static char *cputype_name[] = {
09c1d0
 	[CPU_IVY_BRIDGE_EPEX] = "Ivy Bridge EP/EX", /* Fill in better name */
09c1d0
 	[CPU_HASWELL] = "Haswell", /* Fill in better name */
09c1d0
 	[CPU_HASWELL_EPEX] = "Haswell EP/EX", /* Fill in better name */
09c1d0
+	[CPU_BROADWELL] = "Broadwell",
09c1d0
+	[CPU_KNIGHTS_LANDING] = "Knights Landing",
09c1d0
+	[CPU_ATOM] = "ATOM",
09c1d0
 };
09c1d0
 
09c1d0
 static struct config_choice cpu_choices[] = {
09c1d0
@@ -269,7 +272,10 @@ static struct config_choice cpu_choices[
09c1d0
 	{ "haswell", CPU_HASWELL }, /* Fill in better name */
09c1d0
 	{ "haswell-ep", CPU_HASWELL_EPEX }, /* Fill in better name */
09c1d0
 	{ "haswell-ex", CPU_HASWELL_EPEX }, /* Fill in better name */
09c1d0
-	{}
09c1d0
+	{ "broadwell", CPU_BROADWELL },
09c1d0
+	{ "knightslanding", CPU_KNIGHTS_LANDING },
09c1d0
+	{ "atom", CPU_ATOM },
09c1d0
+	{ NULL }
09c1d0
 };
09c1d0
 
09c1d0
 static void print_cputypes(void)
09c1d0
@@ -430,7 +436,8 @@ static void dump_mce(struct mce *m, unsi
09c1d0
 			mod);
09c1d0
 	}
09c1d0
 	if (cputype != CPU_SANDY_BRIDGE_EP && cputype != CPU_IVY_BRIDGE_EPEX &&
09c1d0
-	    cputype != CPU_HASWELL_EPEX)
09c1d0
+	    cputype != CPU_HASWELL_EPEX && cputype != CPU_BROADWELL &&
09c1d0
+	    cputype != CPU_KNIGHTS_LANDING)
09c1d0
 		resolveaddr(m->addr);
09c1d0
 	if (!ascii_mode && ismemerr && (m->status & MCI_STATUS_ADDRV)) {
09c1d0
 		diskdb_resolve_addr(m->addr);
09c1d0
@@ -517,7 +524,7 @@ int is_cpu_supported(void)
09c1d0
 				if (family == 15) {
09c1d0
 					cputype = CPU_K8;
09c1d0
 				} else if (family >= 16) {
09c1d0
-					SYSERRprintf("AMD Processor family %d: Please use the edac_mce_amd module instead.\n", family);
09c1d0
+					SYSERRprintf("ERROR: AMD Processor family %d: mcelog does not support this processor.  Please use the edac_mce_amd module instead.\n", family);
09c1d0
 					return 0;
09c1d0
 				}
09c1d0
 			} else if (!strcmp(vendor,"GenuineIntel"))
09c1d0
@@ -741,7 +748,7 @@ restart:
09c1d0
 			else
09c1d0
 				s += 3; 
09c1d0
 
09c1d0
-			n = sscanf(s, "%02x:<%016Lx> {%100s}%n",
09c1d0
+			n = sscanf(s, "%02x:<%016Lx> {%99s}%n",
09c1d0
 				   &cs,
09c1d0
 				   &m.ip, 
09c1d0
 				   symbol, &next;; 
09c1d0
@@ -1377,7 +1384,7 @@ int main(int ac, char **av)
09c1d0
 
09c1d0
 	d.buf = xalloc(d.recordlen * d.loglen); 
09c1d0
 	if (daemon_mode) {
09c1d0
-		prefill_memdb();
09c1d0
+		prefill_memdb(do_dmi);
09c1d0
 		if (!do_dmi)
09c1d0
 			closedmi();
09c1d0
 		server_setup();
09c1d0
diff --git a/mcelog.conf b/mcelog.conf
09c1d0
index 6a2be26..f8abb99 100644
09c1d0
--- a/mcelog.conf
09c1d0
+++ b/mcelog.conf
09c1d0
@@ -9,36 +9,36 @@
09c1d0
 # white space is not allowed in value currently, except at the end where it is dropped
09c1d0
 #
09c1d0
 
09c1d0
-# in general all command line options that are not commands work here
09c1d0
-# see man mcelog or mcelog --help for a list
09c1d0
+# In general all command line options that are not commands work here.
09c1d0
+# See man mcelog or mcelog --help for a list.
09c1d0
 # e.g. to enable the --no-syslog option use 
09c1d0
 #no-syslog = yes   (or no to disable)
09c1d0
 # when the option has a argument
09c1d0
 #logfile = /tmp/logfile
09c1d0
-# below are the options which are not command line options
09c1d0
+# below are the options which are not command line options.
09c1d0
 
09c1d0
 # Set CPU type for which mcelog decodes events:
09c1d0
 #cpu = type
09c1d0
-# for valid values for type please see mcelog --help
09c1d0
+# For valid values for type please see mcelog --help.
09c1d0
 # If this value is set incorrectly the decoded output will be likely incorrect.
09c1d0
-# by default when this parameter is not set mcelog uses the CPU it is running on
09c1d0
+# By default when this parameter is not set mcelog uses the CPU it is running on
09c1d0
 # on very new kernels the mcelog events reported by the kernel also carry
09c1d0
 # the CPU type which is used too when available and not overriden.
09c1d0
 
09c1d0
 # Enable daemon mode:
09c1d0
 #daemon = yes
09c1d0
 # By default mcelog just processes the currently pending events and exits.
09c1d0
-# in daemon mode it will keep running as a daemon in the background and poll
09c1d0
+# In daemon mode it will keep running as a daemon in the background and poll
09c1d0
 # the kernel for events and then decode them.
09c1d0
 
09c1d0
-# Filter out known broken events by default
09c1d0
+# Filter out known broken events by default.
09c1d0
 filter = yes
09c1d0
-# don't log memory errors individually
09c1d0
-# they still get accounted if that is enabled
09c1d0
+# Don't log memory errors individually.
09c1d0
+# They still get accounted if that is enabled.
09c1d0
 #filter-memory-errors = yes
09c1d0
 
09c1d0
 # output in undecoded raw format to be easier machine readable
09c1d0
-# (default is decoded)
09c1d0
+# (default is decoded).
09c1d0
 #raw = yes
09c1d0
 
09c1d0
 # Set CPU Mhz to decode uptime from time stamp counter (output
09c1d0
@@ -62,16 +62,17 @@ filter = yes
09c1d0
 # Append log output to logfile instead of stdout. Only when no syslog logging is active   
09c1d0
 #logfile = filename
09c1d0
  
09c1d0
-# Use SMBIOS information to decode DIMMs (needs root)
09c1d0
-# This function is not recommended to use right now and generally not needed
09c1d0
+# Use SMBIOS information to decode DIMMs (needs root).
09c1d0
+# This function is not recommended to use right now and generally not needed.
09c1d0
 # The exception is memdb prepopulation, which is configured separately below.
09c1d0
 #dmi = no
09c1d0
 
09c1d0
-# when in daemon mode run as this user after set up
09c1d0
-# note that the triggers will run as this user too
09c1d0
-# setting this to non root will mean that triggers cannot take some corrective
09c1d0
-# action, like offlining objects
09c1d0
+# When in daemon mode run as this user after set up.
09c1d0
+# Note that the triggers will run as this user too.
09c1d0
+# Setting this to non root will mean that triggers cannot take some corrective
09c1d0
+# action, like offlining objects.
09c1d0
 #run-credentials-user = root
09c1d0
+
09c1d0
 # group to run as daemon with
09c1d0
 # default to the group of the run-credentials-user
09c1d0
 #run-credentials-group = nobody
09c1d0
@@ -79,72 +80,88 @@ filter = yes
09c1d0
 [server]
09c1d0
 # user allowed to access client socket.
09c1d0
 # when set to * match any
09c1d0
-# root is always allowed to access
09c1d0
+# root is always allowed to access.
09c1d0
 # default: root only
09c1d0
 client-user = root
09c1d0
 # group allowed to access mcelog
09c1d0
-# when no group is configured any group matches (but still user checking)
09c1d0
+# When no group is configured any group matches (but still user checking).
09c1d0
 # when set to * match any
09c1d0
 #client-group = root
09c1d0
-# path to the unix socket for client<->server communication
09c1d0
-# when no socket-path is configured the server will not start
09c1d0
+# Path to the unix socket for client<->server communication.
09c1d0
+# When no socket-path is configured the server will not start
09c1d0
 #socket-path = /var/run/mcelog-client
09c1d0
-# when mcelog starts it checks if a server is already running. timeout
09c1d0
+# When mcelog starts it checks if a server is already running. This configures the timeout
09c1d0
 # for this check.
09c1d0
 #initial-ping-timeout = 2 
09c1d0
 #
09c1d0
 [dimm]
09c1d0
 # Is the in memory DIMM error tracking enabled?
09c1d0
 # Only works on systems with integrated memory controller and
09c1d0
-# which are supported
09c1d0
-# Only takes effect in daemon mode
09c1d0
+# which are supported.
09c1d0
+# Only takes effect in daemon mode.
09c1d0
 dimm-tracking-enabled = yes
09c1d0
-# Use DMI information from the BIOS to prepopulate DIMM database
09c1d0
+# Use DMI information from the BIOS to prepopulate DIMM database.
09c1d0
 # Note this might not work with all BIOS and requires mcelog to run as root.
09c1d0
 # Alternative is to let mcelog create DIMM objects on demand.
09c1d0
 dmi-prepopulate = yes
09c1d0
 #
09c1d0
-# execute these triggers when the rate of corrected or uncorrected
09c1d0
-# errors per DIMM exceeds the threshold
09c1d0
+# Execute these triggers when the rate of corrected or uncorrected
09c1d0
+# Errors per DIMM exceeds the threshold.
09c1d0
 # Note when the hardware does not report DIMMs this might also
09c1d0
-# be per channel
09c1d0
+# be per channel.
09c1d0
 # The default of 10/24h is reasonable for server quality 
09c1d0
-# DDR3 DIMMs as of 2009/10
09c1d0
+# DDR3 DIMMs as of 2009/10.
09c1d0
 #uc-error-trigger = dimm-error-trigger
09c1d0
 uc-error-threshold = 1 / 24h
09c1d0
 #ce-error-trigger = dimm-error-trigger
09c1d0
 ce-error-threshold = 10 / 24h
09c1d0
 
09c1d0
 [socket]
09c1d0
-# Memory error accounting per socket
09c1d0
+# Enable memory error accounting per socket.
09c1d0
 socket-tracking-enabled = yes
09c1d0
-# Threshold and trigger for uncorrected memory errors on a socket
09c1d0
+
09c1d0
+# Threshold and trigger for uncorrected memory errors on a socket.
09c1d0
 # mem-uc-error-trigger = socket-memory-error-trigger
09c1d0
+
09c1d0
 mem-uc-error-threshold = 100 / 24h
09c1d0
-# Threshold and trigger for corrected memory errors on a socket
09c1d0
+
09c1d0
+# Trigger script for corrected memory errors on a socket.
09c1d0
 mem-ce-error-trigger = socket-memory-error-trigger
09c1d0
+
09c1d0
+# Threshold on when to trigger a correct error for the socket.
09c1d0
+
09c1d0
 mem-ce-error-threshold = 100 / 24h
09c1d0
+
09c1d0
 #  Log socket error threshold explicitely?
09c1d0
 mem-ce-error-log = yes
09c1d0
 
09c1d0
+# Trigger script for uncorrected bus error events
09c1d0
 bus-uc-threshold-trigger = bus-error-trigger
09c1d0
+
09c1d0
+# Trigger script for uncorrected IOMCA erors
09c1d0
 iomca-threshold-trigger = iomca-error-trigger
09c1d0
+
09c1d0
+# Trigger script for other uncategorized errors
09c1d0
 unknown-threshold-trigger = unknown-error-trigger
09c1d0
 
09c1d0
 [cache]
09c1d0
-# Processing of cache error thresholds reported by Intel CPUs
09c1d0
+# Processing of cache error thresholds reported by Intel CPUs.
09c1d0
 cache-threshold-trigger = cache-error-trigger
09c1d0
+
09c1d0
 # Should cache threshold events be logged explicitely?
09c1d0
 cache-threshold-log = yes
09c1d0
 
09c1d0
 [page]
09c1d0
-# Memory error accouting per 4K memory page
09c1d0
-# Threshold for the correct memory errors trigger script
09c1d0
+# Memory error accouting per 4K memory page.
09c1d0
+# Threshold for the correct memory errors trigger script.
09c1d0
 memory-ce-threshold = 10 / 24h
09c1d0
-# Trigger script for corrected errors
09c1d0
+
09c1d0
+# Trigger script for corrected errors.
09c1d0
 # memory-ce-trigger = page-error-trigger
09c1d0
+
09c1d0
 # Should page threshold events be logged explicitely?
09c1d0
 memory-ce-log = yes
09c1d0
+
09c1d0
 # specify the internal action in mcelog to exceeding a page error threshold
09c1d0
 # this is done in addition to executing the trigger script if available
09c1d0
 # off      no action
09c1d0
diff --git a/mcelog.conf.5 b/mcelog.conf.5
09c1d0
new file mode 100644
09c1d0
index 0000000..5a9afda
09c1d0
--- /dev/null
09c1d0
+++ b/mcelog.conf.5
09c1d0
@@ -0,0 +1,283 @@
09c1d0
+
09c1d0
+." Auto generated mcelog.conf manpage. Do not edit.
09c1d0
+.TH "mcelog.conf" 5 "mcelog"
09c1d0
+
09c1d0
+.SH NAME
09c1d0
+mcelog.conf \- mcelog.conf reference
09c1d0
+.SH SYNOPSIS
09c1d0
+.B /etc/mcelog.conf
09c1d0
+.SH DESCRIPTION
09c1d0
+
09c1d0
+/etc/mcelog.conf is the main configuration file for 
09c1d0
+.B mcelog(8).
09c1d0
+This is configuration file separated into sections including 
09c1d0
+a default section.
09c1d0
+
09c1d0
+
09c1d0
+General format
09c1d0
+.PP
09c1d0
+.B optionname = value
09c1d0
+.PP
09c1d0
+White space is not allowed in value currently, except at the end where it is dropped
09c1d0
+
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+In general all command line options that are not commands work here.
09c1d0
+See man mcelog or mcelog --help for a list.
09c1d0
+e.g. to enable the --no-syslog option use 
09c1d0
+.PP
09c1d0
+.B no-syslog = yes   (or no to disable)
09c1d0
+.PP
09c1d0
+When the option has a argument
09c1d0
+.PP
09c1d0
+.B logfile = /tmp/logfile
09c1d0
+.PP
09c1d0
+Below are the options which are not command line options.
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+Set cpu type for which mcelog decodes events:
09c1d0
+.PP
09c1d0
+.B cpu = type
09c1d0
+.PP
09c1d0
+For valid values for type please see mcelog --help.
09c1d0
+If this value is set incorrectly the decoded output will be likely incorrect.
09c1d0
+By default when this parameter is not set mcelog uses the CPU it is running on
09c1d0
+on very new kernels the mcelog events reported by the kernel also carry
09c1d0
+the CPU type which is used too when available and not overriden.
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+Enable daemon mode:
09c1d0
+.PP
09c1d0
+.B daemon = yes
09c1d0
+.PP
09c1d0
+By default mcelog just processes the currently pending events and exits.
09c1d0
+In daemon mode it will keep running as a daemon in the background and poll
09c1d0
+the kernel for events and then decode them.
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+Filter out known broken events by default.
09c1d0
+.PP
09c1d0
+.B filter = yes
09c1d0
+.PP
09c1d0
+Don't log memory errors individually.
09c1d0
+They still get accounted if that is enabled.
09c1d0
+.PP
09c1d0
+.B filter-memory-errors = yes
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+Output in undecoded raw format to be easier machine readable
09c1d0
+(default is decoded).
09c1d0
+.PP
09c1d0
+.B raw = yes
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+Set cpu mhz to decode uptime from time stamp counter (output
09c1d0
+unreliable, not needed on new kernels which report the event time
09c1d0
+directly. A lot of systems don't have a linear time stamp clock
09c1d0
+and the output is wrong then. 
09c1d0
+Normally mcelog tries to figure out if it the TSC is reliable
09c1d0
+and only uses the current frequency then.
09c1d0
+Setting a frequency forces timestamp decoding.
09c1d0
+This setting is obsolete with modern kernels which report the time 
09c1d0
+directly.
09c1d0
+.PP
09c1d0
+.B cpumhz = 1800.00
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+Log output options
09c1d0
+Log decoded machine checks in syslog (default stdout or syslog for daemon)	     
09c1d0
+.PP
09c1d0
+.B syslog = yes
09c1d0
+.PP
09c1d0
+Log decoded machine checks in syslog with error level
09c1d0
+.PP
09c1d0
+.B syslog-error = yes
09c1d0
+.PP
09c1d0
+Never log anything to syslog
09c1d0
+.PP
09c1d0
+.B no-syslog = yes     
09c1d0
+.PP
09c1d0
+Append log output to logfile instead of stdout. only when no syslog logging is active   
09c1d0
+.PP
09c1d0
+.B logfile = filename
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+Use smbios information to decode dimms (needs root).
09c1d0
+This function is not recommended to use right now and generally not needed.
09c1d0
+The exception is memdb prepopulation, which is configured separately below.
09c1d0
+.PP
09c1d0
+.B dmi = no
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+When in daemon mode run as this user after set up.
09c1d0
+Note that the triggers will run as this user too.
09c1d0
+Setting this to non root will mean that triggers cannot take some corrective
09c1d0
+action, like offlining objects.
09c1d0
+.PP
09c1d0
+.B run-credentials-user = root
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+Group to run as daemon with
09c1d0
+default to the group of the run-credentials-user
09c1d0
+.PP
09c1d0
+.B run-credentials-group = nobody
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+.SS "The server config section"
09c1d0
+User allowed to access client socket.
09c1d0
+when set to * match any
09c1d0
+root is always allowed to access.
09c1d0
+default: root only
09c1d0
+.PP
09c1d0
+.B client-user = root
09c1d0
+.PP
09c1d0
+Group allowed to access mcelog
09c1d0
+When no group is configured any group matches (but still user checking).
09c1d0
+when set to * match any
09c1d0
+.PP
09c1d0
+.B client-group = root
09c1d0
+.PP
09c1d0
+Path to the unix socket for client<->server communication.
09c1d0
+When no socket-path is configured the server will not start
09c1d0
+.PP
09c1d0
+.B socket-path = /var/run/mcelog-client
09c1d0
+.PP
09c1d0
+When mcelog starts it checks if a server is already running. this configures the timeout
09c1d0
+for this check.
09c1d0
+.PP
09c1d0
+.B initial-ping-timeout = 2 
09c1d0
+.PP
09c1d0
+
09c1d0
+.PP
09c1d0
+.SS "The dimm config section"
09c1d0
+Is the in memory dimm error tracking enabled?
09c1d0
+Only works on systems with integrated memory controller and
09c1d0
+which are supported.
09c1d0
+Only takes effect in daemon mode.
09c1d0
+.PP
09c1d0
+.B dimm-tracking-enabled = yes
09c1d0
+.PP
09c1d0
+Use dmi information from the bios to prepopulate dimm database.
09c1d0
+Note this might not work with all BIOS and requires mcelog to run as root.
09c1d0
+Alternative is to let mcelog create DIMM objects on demand.
09c1d0
+.PP
09c1d0
+.B dmi-prepopulate = yes
09c1d0
+.PP
09c1d0
+
09c1d0
+Execute these triggers when the rate of corrected or uncorrected
09c1d0
+Errors per DIMM exceeds the threshold.
09c1d0
+Note when the hardware does not report DIMMs this might also
09c1d0
+be per channel.
09c1d0
+The default of 10/24h is reasonable for server quality 
09c1d0
+DDR3 DIMMs as of 2009/10.
09c1d0
+.PP
09c1d0
+.B uc-error-trigger = dimm-error-trigger
09c1d0
+.PP
09c1d0
+.B uc-error-threshold = 1 / 24h
09c1d0
+.PP
09c1d0
+.B ce-error-trigger = dimm-error-trigger
09c1d0
+.PP
09c1d0
+.B ce-error-threshold = 10 / 24h
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+.SS "The socket config section"
09c1d0
+Enable memory error accounting per socket.
09c1d0
+.PP
09c1d0
+.B socket-tracking-enabled = yes
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+Threshold and trigger for uncorrected memory errors on a socket.
09c1d0
+mem-uc-error-trigger = socket-memory-error-trigger
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+.B mem-uc-error-threshold = 100 / 24h
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+Trigger script for corrected memory errors on a socket.
09c1d0
+.PP
09c1d0
+.B mem-ce-error-trigger = socket-memory-error-trigger
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+Threshold on when to trigger a correct error for the socket.
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+.B mem-ce-error-threshold = 100 / 24h
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+ log socket error threshold explicitely?
09c1d0
+.PP
09c1d0
+.B mem-ce-error-log = yes
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+Trigger script for uncorrected bus error events
09c1d0
+.PP
09c1d0
+.B bus-uc-threshold-trigger = bus-error-trigger
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+Trigger script for uncorrected iomca erors
09c1d0
+.PP
09c1d0
+.B iomca-threshold-trigger = iomca-error-trigger
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+Trigger script for other uncategorized errors
09c1d0
+.PP
09c1d0
+.B unknown-threshold-trigger = unknown-error-trigger
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+.SS "The cache config section"
09c1d0
+Processing of cache error thresholds reported by intel cpus.
09c1d0
+.PP
09c1d0
+.B cache-threshold-trigger = cache-error-trigger
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+Should cache threshold events be logged explicitely?
09c1d0
+.PP
09c1d0
+.B cache-threshold-log = yes
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+.SS "The page config section"
09c1d0
+Memory error accouting per 4k memory page.
09c1d0
+Threshold for the correct memory errors trigger script.
09c1d0
+.PP
09c1d0
+.B memory-ce-threshold = 10 / 24h
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+Trigger script for corrected errors.
09c1d0
+memory-ce-trigger = page-error-trigger
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+Should page threshold events be logged explicitely?
09c1d0
+.PP
09c1d0
+.B memory-ce-log = yes
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+Specify the internal action in mcelog to exceeding a page error threshold
09c1d0
+this is done in addition to executing the trigger script if available
09c1d0
+off      no action
09c1d0
+account  only account errors
09c1d0
+soft     try to soft-offline page without killing any processes
09c1d0
+         This requires an uptodate kernel. Might not be successfull.
09c1d0
+hard     try to hard-offline page by killing processes
09c1d0
+         Requires an uptodate kernel. Might not be successfull.
09c1d0
+soft-then-hard   First try to soft offline, then try hard offlining
09c1d0
+.PP
09c1d0
+.B memory-ce-action = off|account|soft|hard|soft-then-hard
09c1d0
+.PP
09c1d0
+.B memory-ce-action = soft
09c1d0
+.PP
09c1d0
+.PP
09c1d0
+.SS "The trigger config section"
09c1d0
+Maximum number of running triggers
09c1d0
+.PP
09c1d0
+.B children-max = 2
09c1d0
+.PP
09c1d0
+Execute triggers in this directory
09c1d0
+.PP
09c1d0
+.B directory = /etc/mcelog
09c1d0
+.PP
09c1d0
+
09c1d0
+.SH SEE ALSO
09c1d0
+.BR mcelog (8)
09c1d0
+,
09c1d0
+.B http://www.mcelog.org
09c1d0
+
09c1d0
diff --git a/mcelog.h b/mcelog.h
09c1d0
index 550a0a5..6c097cf 100644
09c1d0
--- a/mcelog.h
09c1d0
+++ b/mcelog.h
09c1d0
@@ -65,14 +65,18 @@ struct mce {
09c1d0
 #define MCI_STATUS_PCC   (1ULL<<57)  /* processor context corrupt */
09c1d0
 #define MCI_STATUS_S	 (1ULL<<56)  /* signalled */
09c1d0
 #define MCI_STATUS_AR	 (1ULL<<55)  /* action-required */
09c1d0
+#define MCI_STATUS_FWST  (1ULL<<37)  /* Firmware updated status indicator */
09c1d0
 
09c1d0
 #define MCG_STATUS_RIPV  (1ULL<<0)   /* restart ip valid */
09c1d0
 #define MCG_STATUS_EIPV  (1ULL<<1)   /* eip points to correct instruction */
09c1d0
 #define MCG_STATUS_MCIP  (1ULL<<2)   /* machine check in progress */
09c1d0
+#define MCG_STATUS_LMCES (1ULL<<3)   /* local machine check signaled */
09c1d0
 
09c1d0
 #define MCG_CMCI_P		(1ULL<<10)   /* CMCI supported */
09c1d0
 #define MCG_TES_P		(1ULL<<11)   /* Yellow bit cache threshold supported */
09c1d0
 #define MCG_SER_P		(1ULL<<24)   /* MCA recovery / new status */
09c1d0
+#define MCG_ELOG_P		(1ULL<<26)   /* Extended error log supported */
09c1d0
+#define MCG_LMCE_P		(1ULL<<27)   /* Local machine check supported */
09c1d0
 
09c1d0
 #define NELE(x) (sizeof(x)/sizeof(*(x)))
09c1d0
 #define err(x) perror(x),exit(1)
09c1d0
@@ -119,6 +123,9 @@ enum cputype {
09c1d0
 	CPU_IVY_BRIDGE_EPEX, 
09c1d0
 	CPU_HASWELL,
09c1d0
 	CPU_HASWELL_EPEX,
09c1d0
+	CPU_BROADWELL,
09c1d0
+	CPU_KNIGHTS_LANDING,
09c1d0
+	CPU_ATOM,
09c1d0
 };
09c1d0
 
09c1d0
 enum option_ranges {
09c1d0
diff --git a/mcelog.service b/mcelog.service
09c1d0
new file mode 100644
09c1d0
index 0000000..c5aaf07
09c1d0
--- /dev/null
09c1d0
+++ b/mcelog.service
09c1d0
@@ -0,0 +1,10 @@
09c1d0
+[Unit]
09c1d0
+Description=Machine Check Exception Logging Daemon
09c1d0
+After=syslog.target
09c1d0
+
09c1d0
+[Service] 
09c1d0
+ExecStart=/usr/sbin/mcelog --ignorenodev --daemon --foreground
09c1d0
+StandardOutput=syslog 
09c1d0
+
09c1d0
+[Install]
09c1d0
+WantedBy=multi-user.target
09c1d0
diff --git a/mcelog.triggers.5 b/mcelog.triggers.5
09c1d0
new file mode 100644
09c1d0
index 0000000..510bbef
09c1d0
--- /dev/null
09c1d0
+++ b/mcelog.triggers.5
09c1d0
@@ -0,0 +1,231 @@
09c1d0
+'\" t
09c1d0
+.TH "mcelog.triggers" 5 "mcelog"
09c1d0
+.SH NAME
09c1d0
+mcelog.triggers \- mcelog trigger scripts reference
09c1d0
+.SH SYNOPSIS
09c1d0
+.B /etc/mcelog/bus-error-trigger
09c1d0
+.br
09c1d0
+.B /etc/mcelog/cache-error-trigger
09c1d0
+.br
09c1d0
+.B /etc/mcelog/dimm-error-trigger
09c1d0
+.br
09c1d0
+.B /etc/mcelog/iomca-error-trigger
09c1d0
+.br
09c1d0
+.B /etc/mcelog/page-error-trigger
09c1d0
+.br
09c1d0
+.B /etc/mcelog/socket-memory-error-trigger
09c1d0
+.br
09c1d0
+.B /etc/mcelog/unknown-error-trigger
09c1d0
+.br
09c1d0
+.SH DESCRIPTION
09c1d0
+.BR mcelog(8) 
09c1d0
+maintains thresholds of errors using a 
09c1d0
+.I leaky-bucket
09c1d0
+algorithm.
09c1d0
+When the number of errors in a specific
09c1d0
+time window exceeds a pre-configured threshold a 
09c1d0
+.I trigger
09c1d0
+will be executed. Triggers are usually shell scripts in the
09c1d0
+.B /etc/mcelog 
09c1d0
+directory
09c1d0
+but can be also other internal actions. Thresholds and triggers
09c1d0
+can be configured in
09c1d0
+.BR mcelog.conf(5)
09c1d0
+
09c1d0
+Trigger will run as the user configured for mcelog
09c1d0
+in 
09c1d0
+.I mcelog.conf,
09c1d0
+by default root. The default trigger action can
09c1d0
+be overridden by specifying a different trigger script in the configuration file.
09c1d0
+Actions in addition to the default trigger
09c1d0
+(like notifying an administrator) can be put into the respective
09c1d0
+.I /etc/mcelog/*.local
09c1d0
+script which is executed after the default action. This allows updating the default
09c1d0
+scripts without overriding local actions. All trigger actions are also
09c1d0
+logged to syslog.
09c1d0
+.PP
09c1d0
+.B "The DIMM and socket memory error triggers"
09c1d0
+.PP
09c1d0
+The 
09c1d0
+.B /etc/mcelog/dimm-error-trigger
09c1d0
+and 
09c1d0
+.B /etc/mcelog/socket-memory-error-trigger
09c1d0
+scripts are executed when a DIMM or a CPU socket exceeds
09c1d0
+a configured corrected or uncorrected memory error threshold.
09c1d0
+The thresholds are configured in the 
09c1d0
+.B mcelog.conf
09c1d0
+.I [dimm]
09c1d0
+and
09c1d0
+.I [socket]
09c1d0
+sections.
09c1d0
+The default triggers log a warning message in the system log.
09c1d0
+The triggers are only executed when mcelog runs as a daemon.
09c1d0
+
09c1d0
+Arguments are passed as environment variables
09c1d0
+.TS
09c1d0
+tab(:);
09c1d0
+l l.
09c1d0
+THRESHOLD:human readable threshold status
09c1d0
+MESSAGE:Human readable consolidated error message
09c1d0
+TOTALCOUNT:total corrected or uncorrected count of errors for current DIMM  depending on what triggered the event
09c1d0
+LOCATION:Consolidated location as a single string
09c1d0
+DMI_LOCATION:DIMM location from DMI/SMBIOS if available
09c1d0
+DMI_NAME:DIMM identifier from DMI/SMBIOS if available
09c1d0
+DIMM:DIMM number reported by hardware
09c1d0
+CHANNEL:Channel number reported by hardware
09c1d0
+SOCKETID:Socket ID of CPU that includes the memory controller with the DIMM
09c1d0
+CECOUNT:Total corrected error count for DIMM
09c1d0
+UCCOUNT:Total uncorrected error count for DIMM
09c1d0
+LASTEVENT:Time stamp of event that triggered threshold (in time_t format, seconds)
09c1d0
+THRESHOLD_COUNT:Total umber of events in current threshold time period of specific type
09c1d0
+.TE
09c1d0
+
09c1d0
+After the default action local actions in 
09c1d0
+.B /etc/mcelog/dimm-error-trigger.local
09c1d0
+or respective 
09c1d0
+.B /etc/mcelog/socket-memory-error-trigger.local
09c1d0
+are executed.
09c1d0
+
09c1d0
+.PP
09c1d0
+.B "The page error trigger"
09c1d0
+.PP
09c1d0
+The 
09c1d0
+.B /etc/mcelog/page-error-trigger 
09c1d0
+script is 
09c1d0
+executed by mcelog in daemon mode when a page
09c1d0
+in memory exceeds a pre-configured corrected or uncorrected error threshold.
09c1d0
+mcelog internally also implements offlining the page through the kernel.
09c1d0
+This is configured through the 
09c1d0
+.I [page]
09c1d0
+section of 
09c1d0
+.BR mcelog.conf(5)
09c1d0
+.PP
09c1d0
+The environment arguments are the same as for the 
09c1d0
+.I dimm-error-trigger
09c1d0
+script
09c1d0
+.PP
09c1d0
+After the default action local actions in 
09c1d0
+.I /etc/mcelog/page-error-trigger.loccal are executed.
09c1d0
+
09c1d0
+.PP
09c1d0
+.B "The cache error trigger"
09c1d0
+.PP
09c1d0
+The
09c1d0
+.I /etc/mcelog/cache-error-trigger
09c1d0
+shell script is called for cache error handling in daemon mode
09c1d0
+when a CPU reports excessive corrected cache errors.
09c1d0
+This could be a indication for future uncorrected errors.
09c1d0
+.PP
09c1d0
+This trigger is configured through the 
09c1d0
+.B [cache]
09c1d0
+section in the 
09c1d0
+.BR mcelog.conf(5) 
09c1d0
+configuration file. The threshold is defined by the CPU.  The default trigger offlines the affected CPU cores, unless it is the last core running. 
09c1d0
+.PP
09c1d0
+Arguments are passed as environment variables
09c1d0
+.TS
09c1d0
+tab(:);
09c1d0
+l l.
09c1d0
+MESSAGE:Human readable error message
09c1d0
+CPU:Linux CPU number that triggered the error
09c1d0
+LEVEL:Cache level affected by error
09c1d0
+TYPE:Cache type affected by error (Data,Instruction,Generic)
09c1d0
+AFFECTED_CPUS:List of CPUs sharing the affected cache
09c1d0
+SOCKETID:Socket ID of affected CPU
09c1d0
+.TE
09c1d0
+.PP
09c1d0
+After the default action local actions in 
09c1d0
+.I /etc/mcelog/cache-error-trigger.local are executed.
09c1d0
+.PP
09c1d0
+.B "The bus-uc-threshold-trigger"
09c1d0
+.PP
09c1d0
+The 
09c1d0
+.B bus-uc-threshold-trigger
09c1d0
+runs on uncorrected errors on a IO bus. It is configured through the 
09c1d0
+.B bus-uc-threshold-trigger
09c1d0
+and
09c1d0
+.B bus-uc-threshold-trigger-threshold
09c1d0
+options in
09c1d0
+.I /etc/mcelog.conf(5). 
09c1d0
+By default it logs a message with the error location to the system log.
09c1d0
+After the default action local actions in 
09c1d0
+.I /etc/mcelog/bus-uc-error-trigger.local 
09c1d0
+are executed.
09c1d0
+.PP
09c1d0
+Arguments are passed as environment variables
09c1d0
+.TS
09c1d0
+tab(:);
09c1d0
+l l.
09c1d0
+MESSAGE:Human readable consolidated error message. 
09c1d0
+LOCATION:Consolidated location as a single string 
09c1d0
+SOCKETID:Socket ID of CPU that includes the memory controller with the DIMM
09c1d0
+LEVEL:Interconnect level 
09c1d0
+PARTICIPATION:Processor Participation (Originator, Responder or Observer) 
09c1d0
+REQUEST:Request type (read, write, prefetch, etc.) 
09c1d0
+ORIGIN :Memory or IO
09c1d0
+TIMEOUT:The request timed out or not 
09c1d0
+.TE
09c1d0
+.PP
09c1d0
+.B "The iomca-error-trigger"
09c1d0
+.PP
09c1d0
+The 
09c1d0
+.B iomca-error-trigger
09c1d0
+runs when a socket receives bus or interconnect errors.
09c1d0
+It is configured through the 
09c1d0
+.B iomca-error-trigger 
09c1d0
+and 
09c1d0
+.B iomca-error-trigger-threshold
09c1d0
+options in
09c1d0
+.I /etc/mcelog.conf. By default it logs a message with the error location to the system log.
09c1d0
+After the default action local actions in 
09c1d0
+.I /etc/mcelog/iomca-error-trigger.local are executed.
09c1d0
+.PP
09c1d0
+Arguments are passed as environment variables
09c1d0
+.TS
09c1d0
+tab(:);
09c1d0
+l l.
09c1d0
+MESSAGE:Human readable consolidated error message
09c1d0
+LOCATION:Consolidated location as a single string
09c1d0
+SOCKETID:Socket ID of CPU that includes the memory controller with the DIMM
09c1d0
+CPU:Linux CPU number that triggered the error
09c1d0
+SET:PCI segment number
09c1d0
+BUS:PCI bus number
09c1d0
+DEVICE:PCI device number
09c1d0
+FUNCTION:PCI function number
09c1d0
+.TE
09c1d0
+.PP
09c1d0
+.B "The unknown-error-trigger"
09c1d0
+.PP
09c1d0
+The 
09c1d0
+.B unknown-error-trigger
09c1d0
+runs on any errors not otherwise categorized.
09c1d0
+It is configured through the 
09c1d0
+.B unknown-error-trigger
09c1d0
+and
09c1d0
+.B unknown-error-trigger-threshold
09c1d0
+options in
09c1d0
+.I /etc/mcelog.conf. 
09c1d0
+By default it logs a message to the system log.
09c1d0
+After the default action local actions in 
09c1d0
+.I /etc/mcelog/unknown-error-trigger.local 
09c1d0
+are executed.
09c1d0
+.PP
09c1d0
+Arguments are passed as environment variables
09c1d0
+.TS
09c1d0
+tab(:);
09c1d0
+l l.
09c1d0
+MESSAGE:Human readable consolidated error message
09c1d0
+LOCATION:Consolidated location as a single string
09c1d0
+SOCKETID:Socket ID of CPU that includes the memory controller with the DIMM
09c1d0
+CPU:Linux CPU number that triggered the error
09c1d0
+STATUS:IA32_MCi_STATUS register value
09c1d0
+ADDR:IA32_MCi_ADDR register value
09c1d0
+MISC:IA32_MCi_MISC register value
09c1d0
+MCGSTATUS:IA32_MCG_STATUS register value
09c1d0
+MCGCAP:IA32_MCG_CAP register value
09c1d0
+.TE
09c1d0
+.SH SEE ALSO
09c1d0
+http://www.mcelog.org
09c1d0
+
09c1d0
+.B mcelog(8),
09c1d0
+.B mcelog.conf(5)
09c1d0
diff --git a/memdb.c b/memdb.c
09c1d0
index bde8113..7a33750 100644
09c1d0
--- a/memdb.c
09c1d0
+++ b/memdb.c
09c1d0
@@ -270,6 +270,7 @@ static void dump_errtype(char *name, struct err_type *e, FILE *f, enum printflag
09c1d0
 	int all = (flags & DUMP_ALL);
09c1d0
 	char *s;
09c1d0
 
09c1d0
+	bucket_age(bc, &e->bucket, bucket_time());
09c1d0
 	if (e->count || e->bucket.count || all)
09c1d0
 		fprintf(f, "%s:\n", name);
09c1d0
 	if (e->count || all) {
09c1d0
@@ -382,7 +383,7 @@ parse_dimm_addr(char *bl, unsigned *socketid, unsigned *channel, unsigned *dimm)
09c1d0
 }
09c1d0
 
09c1d0
 /* Prepopulate DIMM database from BIOS information */
09c1d0
-void prefill_memdb(void)
09c1d0
+void prefill_memdb(int do_dmi)
09c1d0
 {
09c1d0
 	static int initialized;
09c1d0
 	int i;
09c1d0
@@ -395,7 +396,7 @@ void prefill_memdb(void)
09c1d0
 	if (!memdb_enabled)
09c1d0
 		return;
09c1d0
 	initialized = 1;
09c1d0
-	if (config_bool("dimm", "dmi-prepopulate") == 0)
09c1d0
+	if (config_bool("dimm", "dmi-prepopulate") == 0 || !do_dmi)
09c1d0
 		return;
09c1d0
 	if (opendmi() < 0)
09c1d0
 		return;
09c1d0
diff --git a/memdb.h b/memdb.h
09c1d0
index 5c68581..afc3348 100644
09c1d0
--- a/memdb.h
09c1d0
+++ b/memdb.h
09c1d0
@@ -11,7 +11,7 @@ enum printflags {
09c1d0
 	DUMP_BIOS = (1 << 1),
09c1d0
 };	
09c1d0
 
09c1d0
-void prefill_memdb(void);
09c1d0
+void prefill_memdb(int do_dmi);
09c1d0
 void memdb_config(void);
09c1d0
 void dump_memory_errors(FILE *f, enum printflags flags);
09c1d0
 
09c1d0
diff --git a/p4.c b/p4.c
09c1d0
index f938196..2bf1eee 100644
09c1d0
--- a/p4.c
09c1d0
+++ b/p4.c
09c1d0
@@ -317,6 +317,10 @@ static int decode_mci(__u64 status, __u64 misc, int cpu, unsigned mcgcap, int *i
09c1d0
 	if (status & (MCI_STATUS_S|MCI_STATUS_AR))
09c1d0
 		Wprintf("%s\n", arstate[(status >> 55) & 3]);
09c1d0
 
09c1d0
+	if ((mcgcap & MCG_SER_P) && (status & MCI_STATUS_FWST)) {
09c1d0
+		Wprintf("Firmware may have updated this error\n");
09c1d0
+	}
09c1d0
+
09c1d0
 	if ((mcgcap == 0 || (mcgcap & MCG_TES_P)) && !(status & MCI_STATUS_UC)) {
09c1d0
 		track = (status >> 53) & 3;
09c1d0
 		decode_tracking(track);
09c1d0
@@ -334,6 +338,8 @@ static void decode_mcg(__u64 mcgstatus)
09c1d0
 		Wprintf("EIPV ");
09c1d0
 	if (mcgstatus & MCG_STATUS_MCIP)
09c1d0
 		Wprintf("MCIP ");
09c1d0
+	if (mcgstatus & MCG_STATUS_LMCES)
09c1d0
+		Wprintf("LMCE ");
09c1d0
 	Wprintf("\n");
09c1d0
 }
09c1d0
 
09c1d0
diff --git a/server.c b/server.c
09c1d0
index 344eb38..a1fa7da 100644
09c1d0
--- a/server.c
09c1d0
+++ b/server.c
09c1d0
@@ -291,7 +291,7 @@ static int server_ping(struct sockaddr_un *un)
09c1d0
 {
09c1d0
 	struct sigaction oldsa;
09c1d0
 	struct sigaction sa = { .sa_handler = ping_timeout };
09c1d0
-	int ret = -1, n;
09c1d0
+	int ret, n;
09c1d0
 	char buf[10];
09c1d0
 	int fd = socket(PF_UNIX, SOCK_STREAM, 0);
09c1d0
 	if (fd < 0)
09c1d0
@@ -299,6 +299,7 @@ static int server_ping(struct sockaddr_un *un)
09c1d0
 
09c1d0
 	sigaction(SIGALRM, &sa, &oldsa);	
09c1d0
 	if (sigsetjmp(ping_timeout_ctx, 1) == 0) {
09c1d0
+		ret = 0;
09c1d0
 		alarm(initial_ping_timeout);
09c1d0
 		if (connect(fd, un, sizeof(struct sockaddr_un)) < 0)
09c1d0
 			goto cleanup;
09c1d0
@@ -308,7 +309,8 @@ static int server_ping(struct sockaddr_un *un)
09c1d0
 			goto cleanup;
09c1d0
 		if (n == 5 && !memcmp(buf, "pong\n", 5))
09c1d0
 			ret = 0;
09c1d0
-	}
09c1d0
+	} else
09c1d0
+		ret = -1;
09c1d0
 cleanup:
09c1d0
 	sigaction(SIGALRM, &oldsa, NULL);
09c1d0
 	alarm(0);
09c1d0
diff --git a/tests/test b/tests/test
09c1d0
index 35bebd2..148bf1f 100755
09c1d0
--- a/tests/test
09c1d0
+++ b/tests/test
09c1d0
@@ -17,6 +17,8 @@ if [ "$(whoami)" != "root" ] ; then
09c1d0
 	exit 1
09c1d0
 fi
09c1d0
 
09c1d0
+[ ! -f /dev/mce-inject ] && modprobe mce-inject
09c1d0
+
09c1d0
 echo "++++++++++++ running $1 test +++++++++++++++++++"
09c1d0
 
09c1d0
 # disable trigger
09c1d0
diff --git a/trigger.c b/trigger.c
09c1d0
index 19466a6..5caca34 100644
09c1d0
--- a/trigger.c
09c1d0
+++ b/trigger.c
09c1d0
@@ -115,11 +115,18 @@ static void finish_child(pid_t child, int status)
09c1d0
 static void child_handler(int sig, siginfo_t *si, void *ctx)
09c1d0
 {
09c1d0
 	int status;
09c1d0
+	pid_t pid;
09c1d0
+
09c1d0
 	if (waitpid(si->si_pid, &status, WNOHANG) < 0) {
09c1d0
 		SYSERRprintf("Cannot collect child %d", si->si_pid);
09c1d0
 		return;
09c1d0
 	}
09c1d0
 	finish_child(si->si_pid, status);
09c1d0
+
09c1d0
+	/* Check other child(ren)'s status to avoid zombie process */
09c1d0
+	while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
09c1d0
+		finish_child(pid, status);
09c1d0
+	}
09c1d0
 }
09c1d0
  
09c1d0
 void trigger_setup(void)
09c1d0
diff --git a/triggers/bus-error-trigger b/triggers/bus-error-trigger
09c1d0
old mode 100644
09c1d0
new mode 100755
09c1d0
diff --git a/triggers/iomca-error-trigger b/triggers/iomca-error-trigger
09c1d0
old mode 100644
09c1d0
new mode 100755
09c1d0
diff --git a/triggers/unknown-error-trigger b/triggers/unknown-error-trigger
09c1d0
old mode 100644
09c1d0
new mode 100755
09c1d0
index b924a0e..fa2866c
09c1d0
--- a/triggers/unknown-error-trigger
09c1d0
+++ b/triggers/unknown-error-trigger
09c1d0
@@ -9,7 +9,7 @@
09c1d0
 # CPU		Linux CPU number that triggered the error
09c1d0
 # STATUS	IA32_MCi_STATUS register value
09c1d0
 # ADDR		IA32_MCi_ADDR register value
09c1d0
-# MISC		IA32_MCi_MISC regiser value
09c1d0
+# MISC		IA32_MCi_MISC register value
09c1d0
 # MCGSTATUS	IA32_MCG_STATUS register value
09c1d0
 # MCGCAP	IA32_MCG_CAP register value
09c1d0
 # For details on the register layout please see the Intel SDM http://www.intel.com/sdm