0. Introduction --------------- We tried to install Subversion in a jail on a unionfs filesystem. Unfortunately, the subversion FreeBSD port doesn't do "make check" before installing, so we spent quite some time debugging to find out it was actually a filesystem bug. 1. The bug in real world. ------------------------- After installing Subversion and get it running using mod_dav_svn, when trying to make a "svn commit" or "svn import", errors similar to these show up in apache error log: [Wed Oct 01 21:04:35 2008] [error] [client 10.1.1.11] Could not MERGE resource "/svn/test2/!svn/act/e28480c9-eb8f-dd11-808c-0018fe7759ca" into "/svn/test2". [409, #0] [Wed Oct 01 21:04:35 2008] [error] [client 10.1.1.11] An error occurred while committing the transaction. [409, #2] [Wed Oct 01 21:04:35 2008] [error] [client 10.1.1.11] Can't remove '/usr/local/svn/test2/db/transactions/2-2.txn/node.0.0' [409, #2] [Wed Oct 01 21:04:35 2008] [error] [client 10.1.1.11] Can't remove file '/usr/local/svn/test2/db/transactions/2-2.txn/node.0.0': No such file or directory [409, #2] The last error is also shown up at client side (svn binary). However, all actions are succesfully accomplished, so the error shouldn't appear at all. Also, when doing "make check" after building subversion from source, would also fail with identical errors. 2. Bug location --------------- After some ktracing and tracing the function calls back to the "root", we discovered that the bug is probably present in Standard C library. Fix: See the attached patch for /usr/src/lib/libc/gen/rewinddir.c. Patch attached with submission follows: How-To-Repeat: 3. Proof of concept code ------------------------ The following code below works fine on UFS, but fails on unionfs. The code itself was taken from subversion codebase and is rewritten not to use Apache apr library, but uses libc functions directly instead. It just shows that there are discrepancies in UFS vs. unionfs behaviour. ----------------------------------------------------------------------- #include <stdio.h> #include <sys/types.h> #include <dirent.h> #include <fcntl.h> #include <string.h> typedef struct dirent direntry; int remove_dir(char *path) { int need_rewind; if (path[0] == '\0') path = "."; DIR *dir; if ((dir = opendir(path)) == NULL) { perror("opendir"); return 1; } do { need_rewind = 0; int ret; long position; direntry entry; direntry *result; while (1) { if (readdir_r(dir, &entry, &result) != 0) { perror("readdir_r"); return 1; } if (result == NULL) { break; } printf("Working on '%s'\n", entry.d_name); printf(" entry.d_fileno is %d\n", entry.d_fileno); if ((entry.d_type == DT_DIR) && ((strcmp(entry.d_name, ".") == 0) || (strcmp(entry.d_name, "..") == 0))) { continue; } else { char fullpath[1000]; need_rewind = 1; strcpy(fullpath, path); strcat(fullpath, "/"); strcat(fullpath, entry.d_name); printf(" fullpath is '%s'\n", fullpath); if (entry.d_type == DT_DIR) { if (remove_dir(fullpath)) { return 1; } } else { if (unlink(fullpath) == -1) { perror("unlink"); return 1; } } } } if (need_rewind) { printf("Rewinding\n"); rewinddir(dir); } } while (need_rewind); if (closedir(dir) == -1) { perror("closedir"); return 1; } if (rmdir(path) == -1) { perror("rmdir"); return 1; } return 0; } int main(int argc, const char *const argv[]) { if (mkdir("test", 0755) == -1) { perror("mkdir"); return 1; } int file = -1; if ((file = open("test/file", O_WRONLY | O_CREAT | O_TRUNC, 0644)) == -1) { perror("open"); return 1; } if (close(file) == -1) { perror("close"); return 1; } if (remove_dir("test")) { printf("Test failed\n"); return 1; } else { printf("It works as it should\n"); return 0; } } ----------------------------------------------------------------------
This should be fixed by http://svnweb.freebsd.org/base?view=revision&revision=268531
Fix has been merged to 10. I don't have any plans to merge it to older branches.