Bug 250309

Summary: panics with hw.bus.devices sysctl handler
Product: Base System Reporter: sigsys
Component: kernAssignee: freebsd-bugs (Nobody) <bugs>
Status: New ---    
Severity: Affects Only Me CC: emaste
Priority: --- Keywords: panic
Version: CURRENT   
Hardware: Any   
OS: Any   

Description sigsys 2020-10-12 23:01:52 UTC
I had panics on boot related to the hw.bus.devices sysctl.

[15] Fatal trap 9: general protection fault while in kernel mode
[15] cpuid = 7; apic id = 09
[15] instruction pointer   = 0x20:0xffffffff80c167fc
[15] stack pointer         = 0x28:0xfffffe00e78b8860
[15] frame pointer         = 0x28:0xfffffe00e78b88a0
[15] code segment          = base rx0, limit 0xfffff, type 0x1b
[15]                       = DPL 0, pres 1, long 1, def32 0, gran 1
[15] processor eflags      = interrupt enabled, resume, IOPL = 0
[15] current process       = 9241 (devmatch)
[15] trap number           = 9
[15] panic: general protection fault
[15] cpuid = 7
[15] time = 1598332353
[15] KDB: stack backtrace:
[15] db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 0xfffffe00e78b8570
[15] vpanic() at vpanic+0x182/frame 0xfffffe00e78b85c0
[15] panic() at panic+0x43/frame 0xfffffe00e78b8620
[15] trap_fatal() at trap_fatal+0x387/frame 0xfffffe00e78b8680
[15] trap() at trap+0xa4/frame 0xfffffe00e78b8790
[15] calltrap() at calltrap+0x8/frame 0xfffffe00e78b8790
[15] --- trap 0x9, rip = 0xffffffff80c167fc, rsp = 0xfffffe00e78b8860, rbp = 0xfffffe00e78b88a0 ---
[15] sysctl_devices() at sysctl_devices+0x21c/frame 0xfffffe00e78b88a0
[15] sysctl_root_handler_locked() at sysctl_root_handler_locked+0x9c/frame 0xfffffe00e78b88f0
[15] sysctl_root() at sysctl_root+0x20a/frame 0xfffffe00e78b8970
[15] userland_sysctl() at userland_sysctl+0x17d/frame 0xfffffe00e78b8a20
[15] sys___sysctl() at sys___sysctl+0x5f/frame 0xfffffe00e78b8ad0
[15] amd64_syscall() at amd64_syscall+0x140/frame 0xfffffe00e78b8bf0
[15] fast_syscall_common() at fast_syscall_common+0xf8/frame 0xfffffe00e78b8bf0
[15] --- syscall (202, FreeBSD ELF64, sys___sysctl), rip = 0x1df89305e12a, rsp = 0x7fffffe4b628, rbp = 0x7fffffe4b660 

It would happen from time to time before but it started happening more and more until I pretty much wasn't able to boot to multi-user mode anymore.  Turns out I had a USB mouse that did that.  The mouse would constantly disconnect and reconnect from the USB and it must have been triggering race conditions in that sysctl's handler.

The following patch fixed it for me but not sure if correct.  It's been working fine for more than a month though (so I don't have to unplug my mouse to be able to boot).


diff --git a/sys/kern/subr_bus.c b/sys/kern/subr_bus.c
index 78d07796659..c42160162ed 100644
--- a/sys/kern/subr_bus.c
+++ b/sys/kern/subr_bus.c
@@ -854,6 +854,7 @@ devctl_safe_quote_sb(struct sbuf *sb, const char *src)
 /* End of /dev/devctl code */
 
 static TAILQ_HEAD(,device)	bus_data_devices;
+static struct sx		bus_data_sx;
 static int bus_data_generation = 1;
 
 static kobj_method_t null_methods[] = {
@@ -1817,7 +1818,9 @@ make_device(device_t parent, const char *name, int unit)
 
 	dev->state = DS_NOTPRESENT;
 
+	sx_xlock(&bus_data_sx);
 	TAILQ_INSERT_TAIL(&bus_data_devices, dev, devlink);
+	sx_xunlock(&bus_data_sx);
 	bus_data_generation_update();
 
 	return (dev);
@@ -1957,9 +1960,11 @@ device_delete_child(device_t dev, device_t child)
 		devclass_delete_device(child->devclass, child);
 	if (child->parent)
 		BUS_CHILD_DELETED(dev, child);
+	sx_xlock(&bus_data_sx);
 	TAILQ_REMOVE(&dev->children, child, link);
 	TAILQ_REMOVE(&bus_data_devices, child, devlink);
 	kobj_delete((kobj_t) child, M_BUS);
+	sx_xunlock(&bus_data_sx);
 
 	bus_data_generation_update();
 	return (0);
@@ -5165,6 +5170,7 @@ root_bus_module_handler(module_t mod, int what, void* arg)
 	switch (what) {
 	case MOD_LOAD:
 		TAILQ_INIT(&bus_data_devices);
+		sx_init(&bus_data_sx, "bus_data_sx");
 		kobj_class_compile((kobj_class_t) &root_driver);
 		root_bus = make_device(NULL, "root", 0);
 		root_bus->desc = "System root bus";
@@ -5507,19 +5513,24 @@ sysctl_devices(SYSCTL_HANDLER_ARGS)
 	/*
 	 * Scan the list of devices, looking for the requested index.
 	 */
+	sx_slock(&bus_data_sx);
 	TAILQ_FOREACH(dev, &bus_data_devices, devlink) {
 		if (index-- == 0)
 			break;
 	}
-	if (dev == NULL)
-		return (ENOENT);
+	if (dev == NULL) {
+		error = ENOENT;
+		goto out;
+	}
 
 	/*
 	 * Populate the return item, careful not to overflow the buffer.
 	 */
 	udev = malloc(sizeof(*udev), M_BUS, M_WAITOK | M_ZERO);
-	if (udev == NULL)
-		return (ENOMEM);
+	if (udev == NULL) {
+		error = ENOMEM;
+		goto out;
+	}
 	udev->dv_handle = (uintptr_t)dev;
 	udev->dv_parent = (uintptr_t)dev->parent;
 	udev->dv_devflags = dev->devflags;
@@ -5550,6 +5561,7 @@ sysctl_devices(SYSCTL_HANDLER_ARGS)
 		error = SYSCTL_OUT(req, udev, sizeof(*udev));
 	sbuf_delete(&sb);
 	free(udev, M_BUS);
+out:	sx_sunlock(&bus_data_sx);
 	return (error);
 }
 
@@ -5586,11 +5598,13 @@ device_lookup_by_name(const char *name)
 {
 	device_t dev;
 
+	sx_slock(&bus_data_sx);
 	TAILQ_FOREACH(dev, &bus_data_devices, devlink) {
 		if (dev->nameunit != NULL && strcmp(dev->nameunit, name) == 0)
-			return (dev);
+			break;
 	}
-	return (NULL);
+	sx_sunlock(&bus_data_sx);
+	return (dev);
 }
 
 /*