Bug 14786 - [PATCH] tail breaks on large files
Summary: [PATCH] tail breaks on large files
Status: Closed FIXED
Alias: None
Product: Base System
Classification: Unclassified
Component: bin (show other bugs)
Version: 4.0-CURRENT
Hardware: Any Any
: Normal Affects Only Me
Assignee: dwmalone
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 1999-11-08 20:00 UTC by chris
Modified: 2001-04-23 15:19 UTC (History)
0 users

See Also:


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description chris 1999-11-08 20:00:01 UTC
mmap() doesn't work on files >2GB as documented in its man page.

The following code is in /usr/src/usr.bin/tail/forward.c:

        if ((start = mmap(NULL, (size_t)size,
            PROT_READ, MAP_SHARED, fileno(fp), (off_t)0)) == MAP_FAILED) {
                ierr();
                return;
        }

Unfortunately, after returning from the rlines function which contains
that code, tail attempts to display any data that has come in "since
we read the file".  In this case, the file has not been read and tail
spits out the entire contents of the file.

Fix: 

Probably one of:

  * Detect sizes > mmap()'s limit and exit with an appropriate message.
  * Dump out when mmap() fails.
  * Use a more complicated algorithm that can handle large files.
How-To-Repeat: 
$ tail <file-larger-than-2GB>
Comment 1 dwmalone 2000-01-11 13:39:17 UTC
I've produced a patch which works by mapping 4MB chunks of the file
at a time, instead of trying to map the whole file. It seems to work
reasonably well, and seems to produce the same results as the original
version of tail - other than on large files where it works correctly.

"tail -r" also suffers from similar problems, which I could produce
a patch for if people think this is the correct way to do things.

	David.

--- forward.c.orig	Mon Dec 13 09:40:31 1999
+++ forward.c	Mon Dec 13 01:11:42 1999
@@ -207,9 +207,11 @@
 	long off;
 	struct stat *sbp;
 {
-	register off_t size;
+	register off_t size, curoff;
 	register char *p;
 	char *start;
+	off_t mapoff;
+	size_t maplen;
 
 	if (!(size = sbp->st_size))
 		return;
@@ -220,27 +222,59 @@
 		return;
 	}
 
-	if ((start = mmap(NULL, (size_t)size,
-	    PROT_READ, MAP_SHARED, fileno(fp), (off_t)0)) == MAP_FAILED) {
-		ierr();
-		return;
+	start = NULL;
+	for (curoff = size - 1, mapoff = size; curoff >= 0; curoff--) {
+		if (curoff < mapoff) {
+			if (start && munmap(start, maplen)) {
+				ierr();
+				return;
+			}
+			mapoff = curoff & (~((4<<20)-1));
+			maplen = curoff - mapoff + 1;
+			if ((start = mmap(NULL, maplen, PROT_READ,
+			    MAP_SHARED, fileno(fp), mapoff)) == MAP_FAILED) {
+				ierr();
+				return;
+			}
+		}
+		p = start + (curoff - mapoff);
+		/* Last char is special, ignore whether newline or not. */
+		if (*p == '\n' && curoff != size -1 && !--off) {
+			curoff++;
+			break;
+		}
 	}
 
-	/* Last char is special, ignore whether newline or not. */
-	for (p = start + size - 1; --size;)
-		if (*--p == '\n' && !--off) {
-			++p;
-			break;
+	if (curoff < 0)
+		curoff = 0;
+
+	while (curoff != size) {
+		if (curoff < mapoff || curoff >= mapoff + maplen) {
+			if (start && munmap(start, maplen)) {
+				ierr();
+				return;
+			}
+			mapoff = curoff & (~((4<<20)-1));
+			maplen = 4<<20;
+			if (mapoff + maplen > size)
+				maplen = size - mapoff;
+			if ((start = mmap(NULL, maplen, PROT_READ,
+			    MAP_SHARED, fileno(fp), mapoff)) == MAP_FAILED) {
+				ierr();
+				return;
+			}
 		}
+		p = start + (curoff - mapoff);
+		WR(p, maplen - (curoff - mapoff));
+		curoff += maplen - (curoff - mapoff);
+	}
 
 	/* Set the file pointer to reflect the length displayed. */
-	size = sbp->st_size - size;
-	WR(p, size);
 	if (fseek(fp, (long)sbp->st_size, SEEK_SET) == -1) {
 		ierr();
 		return;
 	}
-	if (munmap(start, (size_t)sbp->st_size)) {
+	if (start && munmap(start, maplen)) {
 		ierr();
 		return;
 	}
Comment 2 dwmalone freebsd_committer freebsd_triage 2000-07-11 12:45:34 UTC
Responsible Changed
From-To: freebsd-bugs->dwmalone

Of interest to me.
Comment 3 dwmalone 2000-12-16 12:38:22 UTC
> I've tested this with the patched forward.c file which i have attached and
> it's still not working.

It was a stupid problem - I'd fixed the problem, but left in a
check that the file had to be smaller than 2GB! Try the following,
it seemed to work on a 13GB file for me.

	David.


Index: forward.c
===================================================================
RCS file: /cvs/FreeBSD-CVS/src/usr.bin/tail/forward.c,v
retrieving revision 1.16
diff -u -r1.16 forward.c
--- forward.c	2000/12/02 19:10:12	1.16
+++ forward.c	2000/12/16 12:30:32
@@ -269,40 +269,68 @@
 	long off;
 	struct stat *sbp;
 {
-	register off_t size;
+	off_t size, curoff;
 	register char *p;
 	char *start;
+	off_t mapoff;
+	size_t maplen;
 
 	if (!(size = sbp->st_size))
 		return;
 
-	if (size > SIZE_T_MAX) {
-		errno = EFBIG;
-		ierr();
-		return;
+	start = NULL;
+	for (curoff = size - 1, mapoff = size; curoff >= 0; curoff--) {
+		if (curoff < mapoff) {
+			if (start && munmap(start, maplen)) {
+				ierr();
+				return;
+			}
+			mapoff = curoff & (~((4<<20)-1));
+			maplen = curoff - mapoff + 1;
+			if ((start = mmap(NULL, maplen, PROT_READ,
+			    MAP_SHARED, fileno(fp), mapoff)) == MAP_FAILED) {
+				ierr();
+				return;
+			}
+		}
+		p = start + (curoff - mapoff);
+		/* Last char is special, ignore whether newline or not. */
+		if (*p == '\n' && curoff != size -1 && !--off) {
+			curoff++;
+			break;
+		}
 	}
 
-	if ((start = mmap(NULL, (size_t)size,
-	    PROT_READ, MAP_SHARED, fileno(fp), (off_t)0)) == MAP_FAILED) {
-		ierr();
-		return;
-	}
+	if (curoff < 0)
+		curoff = 0;
 
-	/* Last char is special, ignore whether newline or not. */
-	for (p = start + size - 1; --size;)
-		if (*--p == '\n' && !--off) {
-			++p;
-			break;
+	while (curoff != size) {
+		if (curoff < mapoff || curoff >= mapoff + maplen) {
+			if (start && munmap(start, maplen)) {
+				ierr();
+				return;
+			}
+			mapoff = curoff & (~((4<<20)-1));
+			maplen = 4<<20;
+			if (mapoff + maplen > size)
+				maplen = size - mapoff;
+			if ((start = mmap(NULL, maplen, PROT_READ,
+			    MAP_SHARED, fileno(fp), mapoff)) == MAP_FAILED) {
+				ierr();
+				return;
+			}
 		}
+		p = start + (curoff - mapoff);
+		WR(p, maplen - (curoff - mapoff));
+		curoff += maplen - (curoff - mapoff);
+	}
 
 	/* Set the file pointer to reflect the length displayed. */
-	size = sbp->st_size - size;
-	WR(p, size);
-	if (fseek(fp, (long)sbp->st_size, SEEK_SET) == -1) {
+	if (fseeko(fp, sbp->st_size, SEEK_SET) == -1) {
 		ierr();
 		return;
 	}
-	if (munmap(start, (size_t)sbp->st_size)) {
+	if (start != NULL && munmap(start, maplen)) {
 		ierr();
 		return;
 	}
Comment 4 dwmalone freebsd_committer freebsd_triage 2001-04-23 15:18:42 UTC
State Changed
From-To: open->closed

Fixed in -current and 4.3.