I used rsync to copy data to a FAT32 filesystem. My system crashed with an assertion failure in msdosfs_rename. I think the problem is bad error recovery. The first three lines of the core.txt below were in the message buffer but were not copied to /var/log/messages. They must have all happened in quick succession. So the kernel marked the filesystem read-only due to an error and the rename failed in an impossible way as a result. My kernel is 13.2-STABLE up through commit 4c4633fdffbe. The filesystem was mounted with -L zh_CN.UTF-8. This probably does not matter. The data is on ~10 year old USB drive that was mostly used with Windows. I am trying to clone the disk to reproduce the crash. /mnt: Freeing unused sector 7185542 6 fffff001 /dev/da13s1: remounting read-only due to corruption panic: Assertion error == EJUSTRETURN failed at /usr/home/jfc/freebsd/src/sys/fs/msdosfs/msdosfs_vnops.c:1195 cpuid = 1 time = 1705507114 KDB: stack backtrace: #0 0xffffffff80c1a1d5 at kdb_backtrace+0x65 #1 0xffffffff80bcf522 at vpanic+0x152 #2 0xffffffff80bcf323 at panic+0x43 #3 0xffffffff80a78775 at msdosfs_rename+0xc45 #4 0xffffffff8115c81d at VOP_RENAME_APV+0x3d #5 0xffffffff80cc02de at kern_renameat+0x3ee #6 0xffffffff8108aec0 at amd64_syscall+0x140 #7 0xffffffff810601eb at fast_syscall_common+0xf8 [...] #4 0xffffffff80bcf323 in panic (fmt=<unavailable>) at /usr/home/jfc/freebsd/src/sys/kern/kern_shutdown.c:845 ap = {{gp_offset = 32, fp_offset = 48, overflow_arg_area = 0xfffffe05a6054a90, reg_save_area = 0xfffffe05a6054a30}} #5 0xffffffff80a78775 in msdosfs_rename (ap=<optimized out>) at /usr/home/jfc/freebsd/src/sys/fs/msdosfs/msdosfs_vnops.c:1195 toname = "2014VA~1JPG" oldname = "2014VA~1NRU" tdvp = 0xfffff806c7001000 fdvp = 0xfffff806c7001000 fvp = 0xfffff806791725b8 tvp = 0x0 tcnp = 0xfffffe05a6054c48 fcnp = 0xfffffe05a6054d20 pmp = 0xfffff8123e23de00 error = <optimized out> checkpath_locked = <optimized out> newparent = <optimized out> doingdirectory = <optimized out> blkoff = 2720 scn = 146065 nip = <optimized out> vp = <optimized out> fdip = 0xfffff8144ffc0400 fip = 0xfffff825f2a81d00 tdip = 0xfffff8144ffc0400 tip = <optimized out> to_diroffset = 2720 wait_scn = <optimized out> cn = <optimized out> bn = <optimized out> bp = <optimized out> dotdotp = <optimized out> pcl = <optimized out> #6 0xffffffff8115c81d in VOP_RENAME_APV ( vop=0xffffffff81aaf600 <msdosfs_vnodeops>, a=a@entry=0xfffffe05a6054d78) at vnode_if.c:1672 rc = <optimized out> #7 0xffffffff80cc02de in VOP_RENAME (fdvp=<unavailable>, fvp=<optimized out>, tdvp=<optimized out>, tvp=<unavailable>, fcnp=<optimized out>, tcnp=<optimized out>) at ./vnode_if.h:853 a = {a_gen = {a_desc = 0xffffffff81b4ed70 <vop_rename_desc>}, a_fdvp = 0xfffff806c7001000, a_fvp = 0xfffff806791725b8, a_fcnp = 0xfffffe05a6054d20, a_tdvp = 0xfffff806c7001000, a_tvp = 0xfffff806a87c9000, a_tcnp = 0xfffffe05a6054c48} #8 kern_renameat (td=0xfffffe03b0400020, oldfd=-100, old=0x820c39d00 <error: Cannot access memory at address 0x820c39d00>, newfd=-100, new=0x820c3a500 <error: Cannot access memory at address 0x820c3a500>, pathseg=UIO_USERSPACE) at /usr/home/jfc/freebsd/src/sys/kern/vfs_syscalls.c:3732 fromnd = { ni_dirp = 0x820c39d00 <error: Cannot access memory at address 0x820c39d00>, ni_segflg = UIO_USERSPACE, ni_rightsneeded = 0xffffffff81a016b8 <cap_renameat_source_rights>, ni_startdir = 0xfffff806c7001000, ni_rootdir = 0xfffff801429aa1e8, ni_topdir = 0x0, ni_dirfd = -100, ni_lcf = 0, ni_filecaps = { fc_rights = {cr_rights = {0, 0}}, fc_ioctls = 0x0, fc_nioctls = -1, fc_fcntls = 0}, ni_vp = 0xfffff806791725b8, ni_dvp = 0xfffff806c7001000, ni_resflags = 0, ni_debugflags = 3, ni_loopcnt = 0, ni_pathlen = 1, ni_next = 0xfffff80175e1441d "", ni_cnd = {cn_origflags = 264208, cn_flags = 285476880, cn_thread = 0xfffffe03b0400020, cn_cred = 0xfffff80d38c6cd00, cn_nameiop = DELETE, cn_lkflags = 2097152, cn_pnbuf = 0xfffff80175e14400 ".2014ValentineBack.JPG.NrU9fM", cn_nameptr = 0xfffff80175e14400 ".2014ValentineBack.JPG.NrU9fM", cn_namelen = 29}, ni_cap_tracker = {tqh_first = 0x0, tqh_last = 0xfffffe05a6054d60}, ni_dvp_seqc = 1977697309, ni_vp_seqc = 4294965249} tond = { ni_dirp = 0x820c3a500 <error: Cannot access memory at address 0x820c3a500>, ni_segflg = UIO_USERSPACE, ni_rightsneeded = 0xffffffff81a016c8 <cap_renameat_target_rights>, ni_startdir = 0xfffff806c7001000, ni_rootdir = 0xfffff801429aa1e8, ni_topdir = 0x0, ni_dirfd = -100, ni_lcf = 0, ni_filecaps = { fc_rights = {cr_rights = {0, 0}}, fc_ioctls = 0x0, fc_nioctls = -1, fc_fcntls = 0}, ni_vp = 0xfffff806a87c9000, ni_dvp = 0xfffff806c7001000, ni_resflags = 0, ni_debugflags = 3, ni_loopcnt = 0, ni_pathlen = 1, ni_next = 0xfffff80142be0c15 "", ni_cnd = {cn_origflags = 526349, cn_flags = 285740045, cn_thread = 0xfffffe03b0400020, cn_cred = 0xfffff80d38c6cd00, cn_nameiop = RENAME, cn_lkflags = 524288, cn_pnbuf = 0xfffff80142be0c00 "2014ValentineBack.JPG", cn_nameptr = 0xfffff80142be0c00 "2014ValentineBack.JPG", cn_namelen = 21}, ni_cap_tracker = {tqh_first = 0x0, tqh_last = 0xfffffe05a6054c88}, ni_dvp_seqc = 2160781574, ni_vp_seqc = 4294967295} mp = 0xfffffe05fd4d8040 error = 0 fvp = 0xfffff806791725b8 tondflags = <optimized out> tvp = 0xfffff806a87c9000 tdvp = 0xfffff806c7001000 #9 0xffffffff8108aec0 in syscallenter (td=<optimized out>) at /usr/home/jfc/freebsd/src/sys/amd64/amd64/../../kern/subr_syscall.c:188 p = 0xfffffe04e1dcf008 sa = 0xfffffe03b04003f8 error = <optimized out> se = 0xffffffff81ac0670 <sysent+4096> sy_thr_static = true traced = <optimized out> _audit_entered = <optimized out> #10 amd64_syscall (td=0xfffffe03b0400020, traced=0) at /usr/home/jfc/freebsd/src/sys/amd64/amd64/trap.c:1181 ksi = {ksi_link = {tqe_next = 0xfffffe05a6054f30, tqe_prev = 0xffffffff8108a018 <trap+1944>}, ksi_info = { si_signo = -1337982944, si_errno = -509, si_code = -1509601472, si_pid = -507, si_uid = 2785365616, si_status = -507, si_addr = 0x46, si_value = {sival_int = -1509601680, sival_ptr = 0xfffffe05a6054e70, sigval_int = -1509601680, sigval_ptr = 0xfffffe05a6054e70}, _reason = {_fault = { _trapno = -2135248234}, _timer = {_timerid = -2135248234, _overrun = -1}, _mesgq = {_mqd = -2135248234}, _poll = { _band = -2135248234}, __spare__ = {__spare1__ = -2135248234, __spare2__ = {-2114959976, -1, 70, 0, 0, 0, 1951529631}}}}, ksi_flags = -1509601616, ksi_sigq = 0xffffffff80b5f282 <handleevents+578>}
Did it crashed when you rsynced _to_ the failing drive?
rsync was writing to the drive when the system crashed. rsync writes to a temporary file and renames the file into place. The rename caused the crash.
https://reviews.freebsd.org/D43482
A commit in branch main references this bug: URL: https://cgit.FreeBSD.org/src/commit/?id=be0df84849ff3e8fb5ec65176ffde88dbefdc434 commit be0df84849ff3e8fb5ec65176ffde88dbefdc434 Author: Konstantin Belousov <kib@FreeBSD.org> AuthorDate: 2024-01-17 22:55:05 +0000 Commit: Konstantin Belousov <kib@FreeBSD.org> CommitDate: 2024-01-18 16:48:05 +0000 msdosfs_rename(): handle errors from msdosfs_lookup_ino() Properly working storage and correct filesystem structure indeed only allow the EJUSTRETURN return code, but since the called function needs to read directory blocks and (re)parse the content, the assert is not neccessary hold. PR: 276408 Reported by: John F. Carr Reviewed by: markj Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D43482 sys/fs/msdosfs/msdosfs_vnops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
I applied the patches from main to my (now) 13.3-PRERELEASE system. The initial error handling worked, converting the filesystem to read-only and failing the system call in progress. When I unmounted the filesystem my system crashed because the mnt_lockref field was negative. I can report this crash as a separate bug if it is insufficiently related to the original bug. From the crash dump analysis: Unread portion of the kernel message buffer: MPASSERT mp 0xfffffe03ae86f5c0 failed: mp->mnt_ref > 0 && mp->mnt_lockref >= 0 && mp->mnt_writeopcount >= 0 not true at /usr/home/jfc/freebsd/src/sys/kern/vfs_mount.c:1718 (vfs_op_enter) panic: invalid count(s): ref 2314 lockref -1 writeopcount 0 cpuid = 17 time = 1705678821 KDB: stack backtrace: #0 0xffffffff80c1a7d5 at kdb_backtrace+0x65 #1 0xffffffff80bcfa12 at vpanic+0x152 #2 0xffffffff80bcf813 at panic+0x43 #3 0xffffffff80ca51e7 at vfs_op_enter+0x1a7 #4 0xffffffff80ca496f at dounmount+0xff #5 0xffffffff80ca4812 at kern_unmount+0x312 #6 0xffffffff8108ded0 at amd64_syscall+0x140 #7 0xffffffff8106258b at fast_syscall_common+0xf8 Uptime: 4h1m2s Dumping 6801 out of 163636 MB:..1%..11%..21%..31%..41%..51%..61%..71%..81%..91% __curthread () at /usr/home/jfc/freebsd/src/sys/amd64/include/pcpu_aux.h:53 53 __asm("movq %%gs:%P1,%0" : "=r" (td) : "n" (offsetof(struct pcpu, (kgdb) #0 __curthread () at /usr/home/jfc/freebsd/src/sys/amd64/include/pcpu_aux.h:53 td = <optimized out> #1 doadump (textdump=<optimized out>) at /usr/home/jfc/freebsd/src/sys/kern/kern_shutdown.c:394 error = 0 coredump = <optimized out> #2 0xffffffff80bcf622 in kern_reboot (howto=260) at /usr/home/jfc/freebsd/src/sys/kern/kern_shutdown.c:482 once = 0 #3 0xffffffff80bcfa7f in vpanic ( fmt=0xffffffff8126ea0e "invalid count(s): ref %d lockref %d writeopcount %d", ap=ap@entry=0xfffffe03b0c70c10) at /usr/home/jfc/freebsd/src/sys/kern/kern_shutdown.c:921 buf = "invalid count(s): ref 2314 lockref -1 writeopcount 0", '\000' <repeats 203 times> other_cpus = {__bits = {281474976579583, 0, 0, 0}} td = 0xfffff801cda27740 bootopt = <unavailable> newpanic = <optimized out> #4 0xffffffff80bcf813 in panic (fmt=<unavailable>) at /usr/home/jfc/freebsd/src/sys/kern/kern_shutdown.c:845 ap = {{gp_offset = 32, fp_offset = 48, overflow_arg_area = 0xfffffe03b0c70c40, reg_save_area = 0xfffffe03b0c70be0}} #5 0xffffffff80ca51e7 in vfs_op_enter (mp=0xfffffe03ae86f5c0) at /usr/home/jfc/freebsd/src/sys/kern/vfs_mount.c:1715 cpu = <optimized out> mpcpu = <optimized out> #6 0xffffffff80ca496f in dounmount (mp=0xfffffe03ae86f5c0, flags=flags@entry=134217728, td=td@entry=0xfffff801cda27740) at /usr/home/jfc/freebsd/src/sys/kern/vfs_mount.c:1934 coveredvp = 0xfffff801cdf8bb70 mnt_gen_r = <optimized out> error = <unavailable> rootvp = <optimized out> async_flag = <optimized out> #7 0xffffffff80ca4812 in kern_unmount (td=0xfffff801cda27740, path=<optimized out>, flags=134217728) at /usr/home/jfc/freebsd/src/sys/kern/vfs_mount.c:1635 nd = {ni_dirp = 0xe7 <error: Cannot access memory at address 0xe7>, ni_segflg = UIO_USERSPACE, ni_rightsneeded = 0xfffff802b5b61528, ni_startdir = 0x3, ni_rootdir = 0x0, ni_topdir = 0xfffffe03af3ea3f0, ni_dirfd = 58975744, ni_lcf = -2047, ni_filecaps = {fc_rights = {cr_rights = {52, 0}}, fc_ioctls = 0xcda2774211000000, fc_nioctls = 1, fc_fcntls = 0}, ni_vp = 0xfffffe03af3ea3f0, ni_dvp = 0x1ab485eaa000, ni_resflags = 2965835520, ni_debugflags = 65027, ni_loopcnt = 65535, ni_pathlen = 0, ni_next = 0xfffffe03b0c70db0 "\020\016ǰ\003\376\377\377w\327\b\201\377\377\377\377", ni_cnd = {cn_origflags = 18446744071577972639, cn_flags = 18446741890537033476, cn_thread = 0xfffffe03b0c70f40, cn_cred = 0x1, cn_nameiop = LOOKUP, cn_lkflags = 0, cn_pnbuf = 0xfffff801cda27740 "\300\200\370\n\001\376\377\377\020p\357\260\003\376\377\377", cn_nameptr = 0x1ab485eaa558 <error: Cannot access memory at address 0x1ab485eaa558>, cn_namelen = -2183172518384}, ni_cap_tracker = { tqh_first = 0xffffffff8108d777 <trap_pfault+519>, tqh_last = 0x0}, ni_dvp_seqc = 4, ni_vp_seqc = 0} id0 = 231 id1 = 50 error = <optimized out> pathbuf = 0xfffff80105479400 "\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255\336\336\300\255", <incomplete sequence \336>... mp = 0xfffffe03ae86f5c0 #8 0xffffffff8108ded0 in syscallenter (td=<optimized out>) at /usr/home/jfc/freebsd/src/sys/amd64/amd64/../../kern/subr_syscall.c:188 se = 0xffffffff81abd8a0 <sysent+704> p = 0xfffffe03b0ef7010 sa = 0xfffff801cda27b18 error = <optimized out> sy_thr_static = true traced = <optimized out> _audit_entered = <optimized out> #9 amd64_syscall (td=0xfffff801cda27740, traced=0) at /usr/home/jfc/freebsd/src/sys/amd64/amd64/trap.c:1181 ksi = {ksi_link = {tqe_next = 0xfffffe03b0c70f30, tqe_prev = 0xffffffff8108d043 <trap+1971>}, ksi_info = { si_signo = -844990656, si_errno = -2047, si_code = -1329131712, si_pid = -509, si_uid = 2965835376, si_status = -509, si_addr = 0x46, si_value = {sival_int = -1329131920, sival_ptr = 0xfffffe03b0c70e70, sigval_int = -1329131920, sigval_ptr = 0xfffffe03b0c70e70}, _reason = {_fault = { _trapno = -2135246730}, _timer = {_timerid = -2135246730, _overrun = -1}, _mesgq = {_mqd = -2135246730}, _poll = { _band = -2135246730}, __spare__ = {__spare1__ = -2135246730, __spare2__ = {-2114969952, -1, 70, 0, 0, 0, 725966195}}}}, ksi_flags = -1329131856, ksi_sigq = 0xffffffff80b5f242 <handleevents+578>} Here are selected fields from the struct mount object: mnt_vfs_ops = 1 mnt_kern_flag = 0x4100 mnt_flag = 0x1001 mnt_rootvnode = 0 mnt_gen = 1
(In reply to John F. Carr from comment #5) Try this please diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c index 03c794bad900..258c701bd300 100644 --- a/sys/fs/msdosfs/msdosfs_vfsops.c +++ b/sys/fs/msdosfs/msdosfs_vfsops.c @@ -1006,9 +1006,8 @@ msdosfs_remount_ro(void *arg, int pending) } MSDOSFS_UNLOCK_MP(pmp); - do { + while (--pending >= 0) vfs_unbusy(pmp->pm_mountp); - } while (--pending >= 0); } void
(In reply to Konstantin Belousov from comment #6) Changing do{}while to while{} seems to have fixed the problem. I ran rsync until the filesystem had an error and was set read only. I unmounted the filesystem and the system did not crash.
A commit in branch main references this bug: URL: https://cgit.FreeBSD.org/src/commit/?id=4b3ffc5918b9968ea6ddee6f6cdf3538072e5ef4 commit 4b3ffc5918b9968ea6ddee6f6cdf3538072e5ef4 Author: Konstantin Belousov <kib@FreeBSD.org> AuthorDate: 2024-01-19 23:50:48 +0000 Commit: Konstantin Belousov <kib@FreeBSD.org> CommitDate: 2024-01-20 01:36:35 +0000 msdosfs_remount_ro(): correct vfs_unbusy() loop PR: 276408 Reported by: John F. Carr Fixes: 13ccb04589e2c5c840e19b407a59e44cb70ac28e Sponsored by: The FreeBSD Foundation MFC after: 1 week sys/fs/msdosfs/msdosfs_vfsops.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
A commit in branch stable/14 references this bug: URL: https://cgit.FreeBSD.org/src/commit/?id=68d021de34550cf1d133a375ab13544d453323e1 commit 68d021de34550cf1d133a375ab13544d453323e1 Author: Konstantin Belousov <kib@FreeBSD.org> AuthorDate: 2024-01-19 23:50:48 +0000 Commit: Konstantin Belousov <kib@FreeBSD.org> CommitDate: 2024-01-25 03:44:55 +0000 msdosfs_remount_ro(): correct vfs_unbusy() loop PR: 276408 (cherry picked from commit 4b3ffc5918b9968ea6ddee6f6cdf3538072e5ef4) sys/fs/msdosfs/msdosfs_vfsops.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
A commit in branch stable/14 references this bug: URL: https://cgit.FreeBSD.org/src/commit/?id=e465acd0f6b9bfdedd0f3dc21cde81ccade3249c commit e465acd0f6b9bfdedd0f3dc21cde81ccade3249c Author: Konstantin Belousov <kib@FreeBSD.org> AuthorDate: 2024-01-17 22:55:05 +0000 Commit: Konstantin Belousov <kib@FreeBSD.org> CommitDate: 2024-01-25 03:44:55 +0000 msdosfs_rename(): handle errors from msdosfs_lookup_ino() PR: 276408 (cherry picked from commit be0df84849ff3e8fb5ec65176ffde88dbefdc434) sys/fs/msdosfs/msdosfs_vnops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
A commit in branch stable/13 references this bug: URL: https://cgit.FreeBSD.org/src/commit/?id=bbcc33086adc5e2baba3c97f1e53fedcaa504edb commit bbcc33086adc5e2baba3c97f1e53fedcaa504edb Author: Konstantin Belousov <kib@FreeBSD.org> AuthorDate: 2024-01-19 23:50:48 +0000 Commit: Konstantin Belousov <kib@FreeBSD.org> CommitDate: 2024-01-24 13:04:38 +0000 msdosfs_remount_ro(): correct vfs_unbusy() loop PR: 276408 (cherry picked from commit 4b3ffc5918b9968ea6ddee6f6cdf3538072e5ef4) sys/fs/msdosfs/msdosfs_vfsops.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
A commit in branch stable/13 references this bug: URL: https://cgit.FreeBSD.org/src/commit/?id=14ef49bbadeccf6f765025af33a07886fe3dae6f commit 14ef49bbadeccf6f765025af33a07886fe3dae6f Author: Konstantin Belousov <kib@FreeBSD.org> AuthorDate: 2024-01-17 22:55:05 +0000 Commit: Konstantin Belousov <kib@FreeBSD.org> CommitDate: 2024-01-24 13:04:38 +0000 msdosfs_rename(): handle errors from msdosfs_lookup_ino() PR: 276408 (cherry picked from commit be0df84849ff3e8fb5ec65176ffde88dbefdc434) sys/fs/msdosfs/msdosfs_vnops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)