Blame SOURCES/0235-RHBZ-1480638-NVMe-support.patch

4ae388
---
4ae388
 libmultipath/checkers.c    |   19 +++-
4ae388
 libmultipath/checkers.h    |    3 
4ae388
 libmultipath/discovery.c   |  183 +++++++++++++++++++++++++++++++++++++++------
4ae388
 libmultipath/discovery.h   |    2 
4ae388
 libmultipath/hwtable.c     |   10 ++
4ae388
 libmultipath/structs.h     |    1 
4ae388
 libmultipath/uevent.c      |    2 
4ae388
 multipath/multipath.conf.5 |    3 
4ae388
 multipathd/main.c          |   27 ------
4ae388
 9 files changed, 194 insertions(+), 56 deletions(-)
4ae388
4ae388
Index: multipath-tools-130222/libmultipath/discovery.c
4ae388
===================================================================
4ae388
--- multipath-tools-130222.orig/libmultipath/discovery.c
4ae388
+++ multipath-tools-130222/libmultipath/discovery.c
4ae388
@@ -13,6 +13,7 @@
4ae388
 #include <libgen.h>
4ae388
 #include <libudev.h>
4ae388
 #include <libdevmapper.h>
4ae388
+#include <ctype.h>
4ae388
 
4ae388
 #include "checkers.h"
4ae388
 #include "vector.h"
4ae388
@@ -881,6 +882,46 @@ scsi_sysfs_pathinfo (struct path * pp)
4ae388
 }
4ae388
 
4ae388
 static int
4ae388
+nvme_sysfs_pathinfo (struct path * pp)
4ae388
+{
4ae388
+	struct udev_device *parent;
4ae388
+	const char *attr_path = NULL;
4ae388
+
4ae388
+
4ae388
+	attr_path = udev_device_get_sysname(pp->udev);
4ae388
+	if (!attr_path)
4ae388
+		return 1;
4ae388
+
4ae388
+	if (sscanf(attr_path, "nvme%dn%d",
4ae388
+		   &pp->sg_id.host_no,
4ae388
+		   &pp->sg_id.scsi_id) != 2)
4ae388
+		return 1;
4ae388
+	pp->sg_id.channel = 0;
4ae388
+	pp->sg_id.lun = 0;
4ae388
+
4ae388
+	parent = udev_device_get_parent(pp->udev);
4ae388
+	if (!parent)
4ae388
+		return 1;
4ae388
+
4ae388
+	snprintf(pp->vendor_id, SCSI_VENDOR_SIZE, "NVME");
4ae388
+	snprintf(pp->product_id, SCSI_PRODUCT_SIZE, "%s",
4ae388
+		 udev_device_get_sysattr_value(parent, "model"));
4ae388
+	snprintf(pp->serial, SERIAL_SIZE, "%s",
4ae388
+		 udev_device_get_sysattr_value(parent, "serial"));
4ae388
+	snprintf(pp->rev, SCSI_REV_SIZE, "%s",
4ae388
+		 udev_device_get_sysattr_value(parent, "firmware_rev"));
4ae388
+
4ae388
+	condlog(3, "%s: vendor = %s", pp->dev, pp->vendor_id);
4ae388
+	condlog(3, "%s: product = %s", pp->dev, pp->product_id);
4ae388
+	condlog(3, "%s: serial = %s", pp->dev, pp->serial);
4ae388
+	condlog(3, "%s: rev = %s", pp->dev, pp->rev);
4ae388
+
4ae388
+	pp->hwe = find_hwe(conf->hwtable, pp->vendor_id, pp->product_id, NULL);
4ae388
+
4ae388
+	return 0;
4ae388
+}
4ae388
+
4ae388
+static int
4ae388
 rbd_sysfs_pathinfo (struct path * pp)
4ae388
 {
4ae388
 	sprintf(pp->vendor_id, "Ceph");
4ae388
@@ -1040,14 +1081,20 @@ path_offline (struct path * pp)
4ae388
 {
4ae388
 	struct udev_device * parent;
4ae388
 	char buff[SCSI_STATE_SIZE];
4ae388
+	const char *subsys_type;
4ae388
 
4ae388
-	if (pp->bus != SYSFS_BUS_SCSI)
4ae388
+	if (pp->bus == SYSFS_BUS_SCSI)
4ae388
+		subsys_type = "scsi";
4ae388
+	else if (pp->bus == SYSFS_BUS_NVME)
4ae388
+		subsys_type = "nvme";
4ae388
+	else
4ae388
 		return PATH_UP;
4ae388
 
4ae388
 	parent = pp->udev;
4ae388
 	while (parent) {
4ae388
 		const char *subsys = udev_device_get_subsystem(parent);
4ae388
-		if (subsys && !strncmp(subsys, "scsi", 4))
4ae388
+		if (subsys && !strncmp(subsys, subsys_type,
4ae388
+		    		       strlen(subsys_type)))
4ae388
 			break;
4ae388
 		parent = udev_device_get_parent(parent);
4ae388
 	}
4ae388
@@ -1063,15 +1110,30 @@ path_offline (struct path * pp)
4ae388
 
4ae388
 	condlog(3, "%s: path state = %s", pp->dev, buff);
4ae388
 
4ae388
-	if (!strncmp(buff, "offline", 7)) {
4ae388
-		pp->offline = 1;
4ae388
-		return PATH_DOWN;
4ae388
+	if (pp->bus == SYSFS_BUS_SCSI) {
4ae388
+		if (!strncmp(buff, "offline", 7)) {
4ae388
+			pp->offline = 1;
4ae388
+			return PATH_DOWN;
4ae388
+		}
4ae388
+		pp->offline = 0;
4ae388
+		if (!strncmp(buff, "blocked", 7) ||
4ae388
+		    !strncmp(buff, "quiesce", 7))
4ae388
+			return PATH_PENDING;
4ae388
+		else if (!strncmp(buff, "running", 7))
4ae388
+			return PATH_UP;
4ae388
+	}
4ae388
+	else if (pp->bus == SYSFS_BUS_NVME) {
4ae388
+		if (!strncmp(buff, "dead", 4)) {
4ae388
+			pp->offline = 1;
4ae388
+			return PATH_DOWN;
4ae388
+		}
4ae388
+		pp->offline = 0;
4ae388
+		if (!strncmp(buff, "new", 3) ||
4ae388
+		    !strncmp(buff, "deleting", 8))
4ae388
+			return PATH_PENDING;
4ae388
+		else if (!strncmp(buff, "live", 4))
4ae388
+			return PATH_UP;
4ae388
 	}
4ae388
-	pp->offline = 0;
4ae388
-	if (!strncmp(buff, "blocked", 7) || !strncmp(buff, "quiesce", 7))
4ae388
-		return PATH_PENDING;
4ae388
-	else if (!strncmp(buff, "running", 7))
4ae388
-		return PATH_UP;
4ae388
 
4ae388
 	return PATH_DOWN;
4ae388
 }
4ae388
@@ -1091,6 +1153,8 @@ sysfs_pathinfo(struct path * pp)
4ae388
 		pp->bus = SYSFS_BUS_SCSI;
4ae388
 	if (!strncmp(pp->dev,"rbd", 3))
4ae388
 		pp->bus = SYSFS_BUS_RBD;
4ae388
+	if (!strncmp(pp->dev,"nvme", 4))
4ae388
+		pp->bus = SYSFS_BUS_NVME;
4ae388
 
4ae388
 	if (pp->bus == SYSFS_BUS_UNDEF)
4ae388
 		return 0;
4ae388
@@ -1106,6 +1170,9 @@ sysfs_pathinfo(struct path * pp)
4ae388
 	} else if (pp->bus == SYSFS_BUS_RBD) {
4ae388
 		if (rbd_sysfs_pathinfo(pp))
4ae388
 			return 1;
4ae388
+	} else if (pp->bus == SYSFS_BUS_NVME) {
4ae388
+		if (nvme_sysfs_pathinfo(pp))
4ae388
+			return 1;
4ae388
 	}
4ae388
 	return 0;
4ae388
 }
4ae388
@@ -1132,7 +1199,7 @@ cciss_ioctl_pathinfo (struct path * pp,
4ae388
 }
4ae388
 
4ae388
 int
4ae388
-get_state (struct path * pp, int daemon)
4ae388
+get_state (struct path * pp, int daemon, int oldstate)
4ae388
 {
4ae388
 	struct checker * c = &pp->checker;
4ae388
 	int state;
4ae388
@@ -1171,8 +1238,9 @@ get_state (struct path * pp, int daemon)
4ae388
 	    (pp->bus != SYSFS_BUS_SCSI ||
4ae388
 	     sysfs_get_timeout(pp, &(c->timeout))))
4ae388
 		c->timeout = DEF_TIMEOUT;
4ae388
-	state = checker_check(c);
4ae388
-	condlog(3, "%s: state = %s", pp->dev, checker_state_name(state));
4ae388
+	state = checker_check(c, oldstate);
4ae388
+	condlog(3, "%s: %s state = %s", pp->dev,
4ae388
+		checker_name(c), checker_state_name(state));
4ae388
 	if (state != PATH_UP && state != PATH_GHOST &&
4ae388
 	    strlen(checker_message(c)))
4ae388
 		condlog(3, "%s: checker msg is \"%s\"",
4ae388
@@ -1256,6 +1324,82 @@ free_dev:
4ae388
 	return ret;
4ae388
 }
4ae388
 
4ae388
+/*
4ae388
+ * Mangle string of length *len starting at start
4ae388
+ * by removing character sequence "00" (hex for a 0 byte),
4ae388
+ * starting at end, backwards.
4ae388
+ * Changes the value of *len if characters were removed.
4ae388
+ * Returns a pointer to the position where "end" was moved to.
4ae388
+ */
4ae388
+static char *
4ae388
+skip_zeroes_backward(char* start, int *len, char *end)
4ae388
+{
4ae388
+	char *p = end;
4ae388
+
4ae388
+	while (p >= start + 2 && *(p - 1) == '0' && *(p - 2) == '0')
4ae388
+		p -= 2;
4ae388
+
4ae388
+	if (p == end)
4ae388
+		return p;
4ae388
+
4ae388
+	memmove(p, end, start + *len + 1 - end);
4ae388
+	*len -= end - p;
4ae388
+
4ae388
+	return p;
4ae388
+}
4ae388
+
4ae388
+/*
4ae388
+ * Fix for NVME wwids looking like this:
4ae388
+ * nvme.0000-3163653363666438366239656630386200-4c696e75780000000000000000000000000000000000000000000000000000000000000000000000-00000002
4ae388
+ * which are encountered in some combinations of Linux NVME host and target.
4ae388
+ * The '00' are hex-encoded 0-bytes which are forbidden in the serial (SN)
4ae388
+ * and model (MN) fields. Discard them.
4ae388
+ * If a WWID of the above type is found, sets pp->wwid and returns a value > 0.
4ae388
+ * Otherwise, returns 0.
4ae388
+ */
4ae388
+static int
4ae388
+fix_broken_nvme_wwid(struct path *pp, const char *value, int size)
4ae388
+{
4ae388
+	static const char _nvme[] = "nvme.";
4ae388
+	int len, i;
4ae388
+	char mangled[256];
4ae388
+	char *p;
4ae388
+
4ae388
+	len = strlen(value);
4ae388
+	if (len >= sizeof(mangled))
4ae388
+		return 0;
4ae388
+
4ae388
+	/* Check that value starts with "nvme.%04x-" */
4ae388
+	if (memcmp(value, _nvme, sizeof(_nvme) - 1) || value[9] != '-')
4ae388
+		return 0;
4ae388
+	for (i = 5; i < 9; i++)
4ae388
+		if (!isxdigit(value[i]))
4ae388
+			return 0;
4ae388
+
4ae388
+	memcpy(mangled, value, len + 1);
4ae388
+
4ae388
+	/* search end of "model" part and strip trailing '00' */
4ae388
+	p = memrchr(mangled, '-', len);
4ae388
+	if (p == NULL)
4ae388
+		return 0;
4ae388
+
4ae388
+	p = skip_zeroes_backward(mangled, &len, p);
4ae388
+
4ae388
+	/* search end of "serial" part */
4ae388
+	p = memrchr(mangled, '-', p - mangled);
4ae388
+	if (p == NULL || memrchr(mangled, '-', p - mangled) != mangled + 9)
4ae388
+		/* We expect exactly 3 '-' in the value */
4ae388
+		return 0;
4ae388
+
4ae388
+	p = skip_zeroes_backward(mangled, &len, p);
4ae388
+	if (len >= size)
4ae388
+		return 0;
4ae388
+
4ae388
+	memcpy(pp->wwid, mangled, len + 1);
4ae388
+	condlog(2, "%s: over-long WWID shortened to %s", pp->dev, pp->wwid);
4ae388
+	return len;
4ae388
+}
4ae388
+
4ae388
 int
4ae388
 get_uid (struct path * pp, struct udev_device *udev)
4ae388
 {
4ae388
@@ -1287,14 +1431,10 @@ get_uid (struct path * pp, struct udev_d
4ae388
 		     conf->cmd == CMD_VALID_PATH)
4ae388
 			value = getenv(pp->uid_attribute);
4ae388
 		if (value && strlen(value)) {
4ae388
-			size_t len = WWID_SIZE;
4ae388
-
4ae388
-			if (strlen(value) + 1 > WWID_SIZE) {
4ae388
+			size_t len = strlcpy(pp->wwid, value, WWID_SIZE);
4ae388
+			if (len > WWID_SIZE &&
4ae388
+			    !fix_broken_nvme_wwid(pp, value, WWID_SIZE))
4ae388
 				condlog(0, "%s: wwid overflow", pp->dev);
4ae388
-			} else {
4ae388
-				len = strlen(value);
4ae388
-			}
4ae388
-			strncpy(pp->wwid, value, len);
4ae388
 			condlog(4, "%s: got wwid of '%s'", pp->dev, pp->wwid);
4ae388
 			pp->missing_udev_info = INFO_OK;
4ae388
 			pp->tick = 0;
4ae388
@@ -1381,7 +1521,8 @@ pathinfo (struct path *pp, vector hwtabl
4ae388
 
4ae388
 	if (mask & DI_CHECKER) {
4ae388
 		if (path_state == PATH_UP) {
4ae388
-			pp->chkrstate = pp->state = get_state(pp, 0);
4ae388
+			pp->chkrstate = pp->state = get_state(pp, 0,
4ae388
+							      path_state);
4ae388
 			if (pp->state == PATH_UNCHECKED ||
4ae388
 			    pp->state == PATH_WILD)
4ae388
 				goto blank;
4ae388
Index: multipath-tools-130222/libmultipath/hwtable.c
4ae388
===================================================================
4ae388
--- multipath-tools-130222.orig/libmultipath/hwtable.c
4ae388
+++ multipath-tools-130222/libmultipath/hwtable.c
4ae388
@@ -1185,7 +1185,15 @@ static struct hwentry default_hw[] = {
4ae388
 		.checker_name  = RBD,
4ae388
 		.deferred_remove = DEFERRED_REMOVE_ON,
4ae388
 	},
4ae388
-
4ae388
+	/*
4ae388
+	 *  Generic NVMe devices
4ae388
+	 */
4ae388
+	{
4ae388
+		.vendor        = "NVME",
4ae388
+		.product       = ".*",
4ae388
+		.uid_attribute = "ID_WWN",
4ae388
+		.checker_name  = NONE,
4ae388
+	},
4ae388
 	/*
4ae388
 	 * EOL
4ae388
 	 */
4ae388
Index: multipath-tools-130222/libmultipath/structs.h
4ae388
===================================================================
4ae388
--- multipath-tools-130222.orig/libmultipath/structs.h
4ae388
+++ multipath-tools-130222/libmultipath/structs.h
4ae388
@@ -54,6 +54,7 @@ enum sysfs_buses {
4ae388
 	SYSFS_BUS_CCW,
4ae388
 	SYSFS_BUS_CCISS,
4ae388
 	SYSFS_BUS_RBD,
4ae388
+	SYSFS_BUS_NVME,
4ae388
 };
4ae388
 
4ae388
 enum pathstates {
4ae388
Index: multipath-tools-130222/libmultipath/checkers.c
4ae388
===================================================================
4ae388
--- multipath-tools-130222.orig/libmultipath/checkers.c
4ae388
+++ multipath-tools-130222/libmultipath/checkers.c
4ae388
@@ -101,6 +101,8 @@ struct checker * add_checker (char * nam
4ae388
 	if (!c)
4ae388
 		return NULL;
4ae388
 	snprintf(c->name, CHECKER_NAME_LEN, "%s", name);
4ae388
+	if (!strncmp(c->name, NONE, 4))
4ae388
+		goto done;
4ae388
 	snprintf(libname, LIB_CHECKER_NAMELEN, "%s/libcheck%s.so",
4ae388
 		 conf->multipath_dir, name);
4ae388
 	if (stat(libname,&stbuf) < 0) {
4ae388
@@ -144,7 +146,7 @@ struct checker * add_checker (char * nam
4ae388
 		condlog(0, "A dynamic linking error occurred: (%s)", errstr);
4ae388
 	if (!c->repair)
4ae388
 		goto out;
4ae388
-
4ae388
+done:
4ae388
 	c->fd = 0;
4ae388
 	c->sync = 1;
4ae388
 	list_add(&c->node, &checkers);
4ae388
@@ -194,14 +196,16 @@ int checker_init (struct checker * c, vo
4ae388
 	if (!c)
4ae388
 		return 1;
4ae388
 	c->mpcontext = mpctxt_addr;
4ae388
-	return c->init(c);
4ae388
+	if (c->init)
4ae388
+		return c->init(c);
4ae388
+	return 0;
4ae388
 }
4ae388
 
4ae388
 void checker_put (struct checker * dst)
4ae388
 {
4ae388
 	struct checker * src;
4ae388
 
4ae388
-	if (!dst)
4ae388
+	if (!dst || !strlen(dst->name))
4ae388
 		return;
4ae388
 	src = checker_lookup(dst->name);
4ae388
 	if (dst->free)
4ae388
@@ -221,10 +225,11 @@ void checker_repair (struct checker * c)
4ae388
 		return;
4ae388
 	}
4ae388
 
4ae388
-	c->repair(c);
4ae388
+	if (c->repair)
4ae388
+		c->repair(c);
4ae388
 }
4ae388
 
4ae388
-int checker_check (struct checker * c)
4ae388
+int checker_check (struct checker * c, int path_state)
4ae388
 {
4ae388
 	int r;
4ae388
 
4ae388
@@ -236,6 +241,8 @@ int checker_check (struct checker * c)
4ae388
 		MSG(c, "checker disabled");
4ae388
 		return PATH_UNCHECKED;
4ae388
 	}
4ae388
+	if (!strncmp(c->name, NONE, 4))
4ae388
+		return path_state;
4ae388
 	if (c->fd <= 0) {
4ae388
 		MSG(c, "no usable fd");
4ae388
 		return PATH_WILD;
4ae388
@@ -249,6 +256,8 @@ int checker_selected (struct checker * c
4ae388
 {
4ae388
 	if (!c)
4ae388
 		return 0;
4ae388
+	if (!strncmp(c->name, NONE, 4))
4ae388
+		return 1;
4ae388
 	return (c->check) ? 1 : 0;
4ae388
 }
4ae388
 
4ae388
Index: multipath-tools-130222/libmultipath/checkers.h
4ae388
===================================================================
4ae388
--- multipath-tools-130222.orig/libmultipath/checkers.h
4ae388
+++ multipath-tools-130222/libmultipath/checkers.h
4ae388
@@ -75,6 +75,7 @@ enum path_check_state {
4ae388
 #define EMC_CLARIION "emc_clariion"
4ae388
 #define READSECTOR0  "readsector0"
4ae388
 #define CCISS_TUR    "cciss_tur"
4ae388
+#define NONE         "none"
4ae388
 #define RBD          "rbd"
4ae388
 
4ae388
 #define DEFAULT_CHECKER DIRECTIO
4ae388
@@ -129,7 +130,7 @@ void checker_set_fd (struct checker *, i
4ae388
 void checker_enable (struct checker *);
4ae388
 void checker_disable (struct checker *);
4ae388
 void checker_repair (struct checker *);
4ae388
-int checker_check (struct checker *);
4ae388
+int checker_check (struct checker *, int);
4ae388
 int checker_selected (struct checker *);
4ae388
 char * checker_name (struct checker *);
4ae388
 char * checker_message (struct checker *);
4ae388
Index: multipath-tools-130222/libmultipath/discovery.h
4ae388
===================================================================
4ae388
--- multipath-tools-130222.orig/libmultipath/discovery.h
4ae388
+++ multipath-tools-130222/libmultipath/discovery.h
4ae388
@@ -35,7 +35,7 @@ int path_discovery (vector pathvec, stru
4ae388
 
4ae388
 int do_tur (char *);
4ae388
 int path_offline (struct path *);
4ae388
-int get_state (struct path * pp, int daemon);
4ae388
+int get_state (struct path * pp, int daemon, int state);
4ae388
 int pathinfo (struct path *, vector hwtable, int mask);
4ae388
 int store_pathinfo (vector pathvec, vector hwtable,
4ae388
 		    struct udev_device *udevice, int flag,
4ae388
Index: multipath-tools-130222/libmultipath/uevent.c
4ae388
===================================================================
4ae388
--- multipath-tools-130222.orig/libmultipath/uevent.c
4ae388
+++ multipath-tools-130222/libmultipath/uevent.c
4ae388
@@ -447,7 +447,7 @@ int uevent_listen(struct udev *udev)
4ae388
 		goto out;
4ae388
 	}
4ae388
 	err = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block",
4ae388
-							      NULL);
4ae388
+							      "disk");
4ae388
 	if (err)
4ae388
 		condlog(2, "failed to create filter : %s", strerror(-err));
4ae388
 	err = udev_monitor_enable_receiving(monitor);
4ae388
Index: multipath-tools-130222/multipath/multipath.conf.5
4ae388
===================================================================
4ae388
--- multipath-tools-130222.orig/multipath/multipath.conf.5
4ae388
+++ multipath-tools-130222/multipath/multipath.conf.5
4ae388
@@ -284,6 +284,9 @@ Check the path state for LSI/Engenio/Net
4ae388
 .B directio
4ae388
 Read the first sector with direct I/O.
4ae388
 .TP
4ae388
+.B none
4ae388
+Do not check the device, fallback to use the values retrieved from sysfs
4ae388
+.TP
4ae388
 .B rbd
4ae388
 Check if the path is in the Ceph blacklist.
4ae388
 .TP
4ae388
Index: multipath-tools-130222/multipathd/main.c
4ae388
===================================================================
4ae388
--- multipath-tools-130222.orig/multipathd/main.c
4ae388
+++ multipath-tools-130222/multipathd/main.c
4ae388
@@ -908,28 +908,6 @@ out:
4ae388
 	return r;
4ae388
 }
4ae388
 
4ae388
-static int
4ae388
-uev_discard(char * devpath)
4ae388
-{
4ae388
-	char *tmp;
4ae388
-	char a[11], b[11];
4ae388
-
4ae388
-	/*
4ae388
-	 * keep only block devices, discard partitions
4ae388
-	 */
4ae388
-	tmp = strstr(devpath, "/block/");
4ae388
-	if (tmp == NULL){
4ae388
-		condlog(4, "no /block/ in '%s'", devpath);
4ae388
-		return 1;
4ae388
-	}
4ae388
-	if (sscanf(tmp, "/block/%10s", a) != 1 ||
4ae388
-	    sscanf(tmp, "/block/%10[^/]/%10s", a, b) == 2) {
4ae388
-		condlog(4, "discard event on %s", devpath);
4ae388
-		return 1;
4ae388
-	}
4ae388
-	return 0;
4ae388
-}
4ae388
-
4ae388
 int
4ae388
 uev_trigger (struct uevent * uev, void * trigger_data)
4ae388
 {
4ae388
@@ -938,9 +916,6 @@ uev_trigger (struct uevent * uev, void *
4ae388
 
4ae388
 	vecs = (struct vectors *)trigger_data;
4ae388
 
4ae388
-	if (uev_discard(uev->devpath))
4ae388
-		return 0;
4ae388
-
4ae388
 	pthread_cleanup_push(cleanup_lock, &vecs->lock);
4ae388
 	lock(vecs->lock);
4ae388
 	pthread_testcancel();
4ae388
@@ -1358,7 +1333,7 @@ check_path (struct vectors * vecs, struc
4ae388
 
4ae388
 	newstate = path_offline(pp);
4ae388
 	if (newstate == PATH_UP)
4ae388
-		newstate = get_state(pp, 1);
4ae388
+		newstate = get_state(pp, 1, newstate);
4ae388
 	else
4ae388
 		checker_clear_message(&pp->checker);
4ae388