Bug 96677 - [patch] Improvements for src/tools/tools/recoverdisk
Summary: [patch] Improvements for src/tools/tools/recoverdisk
Status: Closed FIXED
Alias: None
Product: Base System
Classification: Unclassified
Component: bin (show other bugs)
Version: Unspecified
Hardware: Any Any
: Normal Affects Only Me
Assignee: Maxim Konovalov
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2006-05-02 20:10 UTC by Ulrich Spoerlein
Modified: 2015-01-13 12:42 UTC (History)
1 user (show)

See Also:


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Ulrich Spoerlein 2006-05-02 20:10:09 UTC
Already sent this to current@ for review. Only positive feedback so far.
I also whipped up a preliminary man-page, though it would still require
some work. I'm not sure about the 128kB DMA reads and my mdoc-fu is
probably rather lousy.

Fix: 

.\" Insert license here
.\" $FreeBSD$
.\"
.Dd May 2, 2006
.Dt RECOVERDISK 1
.Os
.Sh NAME
.Nm recoverdisk
.Nd recover data from hard disk or optical media
.Sh SYNOPSIS
.Nm
.Op Fl r Ar rlist
.Op Fl w Ar wlist
.Ar special
.Op file
.Sh DESCRIPTION
The
.Nm
utility reads data from the
.Pa special
file until all blocks could be successfully read.
It starts reading in multiples of the sector size.
Whenever a block fails, it is put to the end of the working queue and will be
read again, possibly with a smaller read size.
.Pp
It uses block sizes of roughly 1 MB, 64kB, and the native sector size (usually
512 bytes).
These figures are adjusted slightly, for devices whose sectorsize is not a
power of 2, e.g., audio CDs with a sector size of 2352 bytes.
.Pp
The options are as follows:
.Bl -tag -width indent
.It Fl r Ar rlist
Read the list of blocks and block sizes to read from the specified file.
.It Fl w Ar wlist
Write the list of remaining blocks to read to the specified file if
.Nm
is aborted via SIGINT.
.El
.Pp
The
.Fl r
and
.Fl w
option can be used in combination.
Especially, they can point to the same file, which will be updated on abort.
.Sh OUTPUT
.Nm
prints several columns, detailing the progress
.Bl -tag -width remaining
.It start
Starting offset of the current block.
.It size
Read size of the current block.
.It len
Length of the current block.
.It state
Is increased for every failed read.
.It done
Number of bytes already read.
.It remaining
Number of bytes remaining.
.It % done
Percent complete.
.El
.Sh EXAMPLES
# recover data from failing hard drive ad3
.Dl $ touch /data/lots_of_space
.Dl $ recoverdisk /dev/ad3 /data/lots_of_space
.Pp
# clone a hard disk
.Dl $ recoverdisk /dev/ad3 /dev/ad4
.Pp
# read an ISO image from a CD-ROM
.Dl $ touch /data/cd.iso; recoverdisk /dev/acd0 /data/cd.iso
.Pp
# continue reading from a broken CD and update the existing worklist
.Dl $ recoverdisk -r worklist -w worklist /dev/acd0 /data/cd.iso
.Sh SEE ALSO
.Xr dd 1
.Sh HISTORY
The
.Nm
command first appeared in
.Fx 6.2 .
.Sh BUGS
Reading from media where the sectorsize is not a power of 2 will make all
1 MB reads fail.
This is due to the DMA reads being split up into blocks of at most 128kB.
XXX I have no clue, verify this!
These reads then fail if the sectorsize is not a divisor of 128kB.
When reading a full raw audio CD, this leads to roughly 700 error messages
flying by.
This is harmless.
.Sh AUTHORS
.An -nosplit
The original implementation was done by
.An Poul-Henning Kamp Aq phk@freebsd.org
with minor improvements from
.An Ulrich Sp\(:orlein Aq uspoerlein@gmail.com .
--- recoverdisk.1 ends here -----zQUh8oyh79IMLnlclRx12bykJUVu3o6Lm9IK8sYbFCWiGuKA
Content-Type: text/plain; name="recoverdisk.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="recoverdisk.diff"

--- recoverdisk.c.orig	Thu Dec 15 19:41:42 2005
+++ recoverdisk.c	Tue May  2 18:16:01 2006
@@ -1,4 +1,4 @@
-/*
+/*-
  * ----------------------------------------------------------------------------
  * "THE BEER-WARE LICENSE" (Revision 42):
  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
@@ -14,15 +14,20 @@
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <signal.h>
+#include <string.h>
+#include <sysexits.h>
 #include <time.h>
 #include <unistd.h>
 #include <sys/queue.h>
 #include <sys/disk.h>
 #include <sys/stat.h>
 
-#define BIGSIZE		(1024 * 1024)
-#define MEDIUMSIZE	(64 * 1024)
-#define MINSIZE		(512)
+#define	MIN(a,b) (((a)<(b))?(a):(b))
+
+static size_t bigsize = 1024 * 1024;
+static size_t medsize = 64 * 1024;
+static size_t minsize = 512;
 
 struct lump {
 	off_t			start;
@@ -48,29 +53,120 @@
 	TAILQ_INSERT_TAIL(&lumps, lp, list);
 }
 
+static struct lump *lp;
+static char *wworklist = NULL;
+static char *rworklist = NULL;
+
+/* Save the worklist, if -w was given */
+static void
+save_worklist(__unused int sig)
+{
+	FILE *file;
+
+	if (wworklist != NULL) {
+		(void)fprintf(stderr, "\nSaving worklist ...");
+		fflush(stderr);
+
+		file = fopen(wworklist, "w");
+		if (file == NULL)
+			err(1, "Error opening file %s", wworklist);
+
+		for (;;) {
+			lp = TAILQ_FIRST(&lumps);
+			if (lp == NULL)
+				break;
+			fprintf(file, "%jd %jd %d\n",
+			    (intmax_t)lp->start, (intmax_t)lp->len, lp->state);
+			TAILQ_REMOVE(&lumps, lp, list);
+		}
+		(void)fprintf(stderr, " done.\n");
+	}
+	exit(0);
+}
+
+static off_t
+read_worklist(off_t t)
+{
+	off_t s, l, d;
+	int state, lines;
+	FILE *file;
+	
+	(void)fprintf(stderr, "Reading worklist ...");
+	fflush(stderr);
+	file = fopen(rworklist, "r");
+	if (file == NULL)
+		err(1, "Error opening file %s", rworklist);
+
+	lines = 0;
+	d = t;
+	for (;;) {
+		++lines;
+		if (3 != fscanf(file, "%jd %jd %d\n", &s, &l, &state)) {
+			if (!feof(file))
+				err(1, "Error parsing file %s at line %d",
+				    rworklist, lines);
+			else
+				break;
+		}
+
+		new_lump(s, l, state);
+		d -= l;
+	}
+	(void)fprintf(stderr, " done.\n");
+
+	/* 
+	 * Return the number of bytes already read
+	 * (at least not in worklist).
+	 */
+	return (d);
+}
+
+static void
+usage(void)
+{
+	(void)fprintf(stderr,
+    "usage: recoverdisk [-r worklist] [-w worklist] source-drive [destination]\n");
+	exit(EX_USAGE);
+}
+
 int
-main(int argc, const char **argv)
+main(int argc, char * const argv[])
 {
+	int ch;
 	int fdr, fdw;
-	struct lump *lp;
-	off_t 	t, d;
+	off_t t, d;
 	size_t i, j;
 	int error, flags;
 	u_char *buf;
-	u_int sectorsize, minsize;
+	u_int sectorsize;
 	time_t t1, t2;
 	struct stat sb;
 
+	while ((ch = getopt(argc, argv, "r:w:")) != -1) {
+		switch (ch) {
+			case 'w':
+				wworklist = strdup(optarg);
+				if (wworklist == NULL)
+					err(1, "Cannot allocate enough memory");
+				break;
+			case 'r':
+				rworklist = strdup(optarg);
+				if (rworklist == NULL)
+					err(1, "Cannot allocate enough memory");
+				break;
+			default:
+				usage();
+		}
+	}
+	argc -= optind;
+	argv += optind;
 
-	if (argc < 2)
-		errx(1, "Usage: %s source-drive [destination]", argv[0]);
+	if (argc < 1 || argc > 2)
+		usage();
 
-	buf = malloc(BIGSIZE);
-	if (buf == NULL)
-		err(1, "Cannot allocate %d bytes buffer", BIGSIZE);
-	fdr = open(argv[1], O_RDONLY);
+	fdr = open(argv[0], O_RDONLY);
 	if (fdr < 0)
-		err(1, "Cannot open read descriptor %s", argv[1]);
+		err(1, "Cannot open read descriptor %s", argv[0]);
 
 	error = fstat(fdr, &sb);
 	if (error < 0)
@@ -80,46 +176,62 @@
 		error = ioctl(fdr, DIOCGSECTORSIZE, &sectorsize);
 		if (error < 0)
 			err(1, "DIOCGSECTORSIZE failed");
+
+		/*
+		 * Make medsize roughly 64kB, depending on native sector
+		 * size. bigsize has to be a multiple of medsize.
+		 * For media with 2352 sectors, this will
+		 * result in 2352, 63504, and 1016064 bytes.
+		 */
 		minsize = sectorsize;
+		medsize = (medsize / sectorsize) * sectorsize;
+		bigsize = medsize * 16;
 
 		error = ioctl(fdr, DIOCGMEDIASIZE, &t);
 		if (error < 0)
 			err(1, "DIOCGMEDIASIZE failed");
 	} else {
-		sectorsize = 1;
 		t = sb.st_size;
-		minsize = MINSIZE;
 		flags |= O_CREAT | O_TRUNC;
 	}
 
-	if (argc > 2) {
-		fdw = open(argv[2], flags, DEFFILEMODE);
+	buf = malloc(bigsize);
+	if (buf == NULL)
+		err(1, "Cannot allocate %jd bytes buffer", (intmax_t)bigsize);
+
+	if (argc > 1) {
+		fdw = open(argv[1], flags, DEFFILEMODE);
 		if (fdw < 0)
-			err(1, "Cannot open write descriptor %s", argv[2]);
+			err(1, "Cannot open write descriptor %s", argv[1]);
 	} else {
 		fdw = -1;
 	}
 
-	new_lump(0, t, 0);
-	d = 0;
+	if (rworklist != NULL) {
+		d = read_worklist(t);
+	} else {
+		new_lump(0, t, 0);
+		d = 0;
+	}
+
+	signal(SIGINT, save_worklist);
 
 	t1 = 0;
+	printf("%13s %7s %13s %5s %13s %13s %9s\n",
+	    "start", "size", "len", "state", "done", "remaining", "% done");
 	for (;;) {
 		lp = TAILQ_FIRST(&lumps);
 		if (lp == NULL)
 			break;
-		TAILQ_REMOVE(&lumps, lp, list);
 		while (lp->len > 0) {
-			i = BIGSIZE;
-			if (lp->len < BIGSIZE)
-				i = lp->len;
+			i = MIN(lp->len, bigsize);
 			if (lp->state == 1)
-				i = MEDIUMSIZE;
+				i = MIN(lp->len, medsize);
 			if (lp->state > 1)
-				i = minsize;
+				i = MIN(lp->len, minsize);
 			time(&t2);
-			if (t1 != t2 || lp->len < BIGSIZE) {
-				printf("\r%13jd %7zu %13jd %3d %13jd %13jd %.8f",
+			if (t1 != t2 || lp->len < bigsize) {
+				printf("\r%13jd %7zu %13jd %5d %13jd %13jd %.7f",
 				    (intmax_t)lp->start,
 				    i, 
 				    (intmax_t)lp->len,
@@ -152,9 +264,9 @@
 			lp->start += i;
 			lp->len -= i;
 		}
+		TAILQ_REMOVE(&lumps, lp, list);
 		free(lp);
 	}
 	printf("\nCompleted\n");
-	exit (0);
+	return (0);
 }
-
Comment 1 Maxim Konovalov 2006-05-03 21:47:42 UTC
Hi Ulrich,

[...]
> +static void
> +save_worklist(__unused int sig)
> +{
> +	FILE *file;
> +
> +	if (wworklist != NULL) {
> +		(void)fprintf(stderr, "\nSaving worklist ...");
> +		fflush(stderr);
> +
> +		file = fopen(wworklist, "w");
> +		if (file == NULL)
> +			err(1, "Error opening file %s", wworklist);
> +
> +		for (;;) {
> +			lp = TAILQ_FIRST(&lumps);
> +			if (lp == NULL)
> +				break;
> +			fprintf(file, "%jd %jd %d\n",
> +			    (intmax_t)lp->start, (intmax_t)lp->len, lp->state);
> +			TAILQ_REMOVE(&lumps, lp, list);
> +		}
> +		(void)fprintf(stderr, " done.\n");
> +	}
> +	exit(0);
> +}
[...]

In general, you can't use signal unsafe functions (e.g. all stdio(3)
functions) in the signal handlers.  Manupulation with unprotected data
in the signal handler is unsafe too.

See Bruce's followup to bin/78304 for some useful info about signal
handlers: http://www.freebsd.org/cgi/query-pr.cgi?pr=bin/78304

-- 
Maxim Konovalov
Comment 2 Ulrich Spoerlein 2006-05-04 14:56:17 UTC
Here's a simple alternative, that only sets a flag in the signal
handler.

Ulrich Spoerlein

--- recoverdisk.c.orig	Thu Dec 15 19:41:42 2005
+++ recoverdisk.c	Thu May  4 15:51:08 2006
@@ -1,4 +1,4 @@
-/*
+/*-
  * ----------------------------------------------------------------------------
  * "THE BEER-WARE LICENSE" (Revision 42):
  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
@@ -8,21 +8,29 @@
  *
  * $FreeBSD: src/tools/tools/recoverdisk/recoverdisk.c,v 1.4.4.1 2005/12/15 03:50:03 sobomax Exp $
  */
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <signal.h>
+#include <string.h>
+#include <sysexits.h>
 #include <time.h>
 #include <unistd.h>
 #include <sys/queue.h>
 #include <sys/disk.h>
 #include <sys/stat.h>
 
-#define BIGSIZE		(1024 * 1024)
-#define MEDIUMSIZE	(64 * 1024)
-#define MINSIZE		(512)
+#define	MIN(a,b) (((a)<(b))?(a):(b))
+
+static bool aborting = false;
+
+static size_t bigsize = 1024 * 1024;
+static size_t medsize = 64 * 1024;
+static size_t minsize = 512;
 
 struct lump {
 	off_t			start;
@@ -33,7 +41,6 @@
 
 static TAILQ_HEAD(, lump) lumps = TAILQ_HEAD_INITIALIZER(lumps);
 
-
 static void
 new_lump(off_t start, off_t len, int state)
 {
@@ -48,29 +55,128 @@
 	TAILQ_INSERT_TAIL(&lumps, lp, list);
 }
 
+static struct lump *lp;
+static char *wworklist = NULL;
+static char *rworklist = NULL;
+
+/* Save the worklist if -w was given */
+static void
+save_worklist(void)
+{
+	FILE *file;
+
+	if (wworklist != NULL) {
+		(void)fprintf(stderr, "\nSaving worklist ...");
+		fflush(stderr);
+
+		file = fopen(wworklist, "w");
+		if (file == NULL)
+			err(1, "Error opening file %s", wworklist);
+
+		for (;;) {
+			lp = TAILQ_FIRST(&lumps);
+			if (lp == NULL)
+				break;
+			fprintf(file, "%jd %jd %d\n",
+			    (intmax_t)lp->start, (intmax_t)lp->len, lp->state);
+			TAILQ_REMOVE(&lumps, lp, list);
+		}
+		(void)fprintf(stderr, " done.\n");
+	}
+	exit(0);
+}
+
+/* Read the worklist if -r was given */
+static off_t
+read_worklist(off_t t)
+{
+	off_t s, l, d;
+	int state, lines;
+	FILE *file;
+	
+	(void)fprintf(stderr, "Reading worklist ...");
+	fflush(stderr);
+	file = fopen(rworklist, "r");
+	if (file == NULL)
+		err(1, "Error opening file %s", rworklist);
+
+	lines = 0;
+	d = t;
+	for (;;) {
+		++lines;
+		if (3 != fscanf(file, "%jd %jd %d\n", &s, &l, &state)) {
+			if (!feof(file))
+				err(1, "Error parsing file %s at line %d",
+				    rworklist, lines);
+			else
+				break;
+		}
+
+		new_lump(s, l, state);
+		d -= l;
+	}
+	(void)fprintf(stderr, " done.\n");
+
+	/* 
+	 * Return the number of bytes already read
+	 * (at least not in worklist).
+	 */
+	return (d);
+}
+
+static void
+usage(void)
+{
+	(void)fprintf(stderr,
+    "usage: recoverdisk [-r worklist] [-w worklist] source-drive [destination]\n");
+	exit(EX_USAGE);
+}
+
+static void
+sighandler(__unused int sig)
+{
+	aborting = true;
+	signal(SIGINT, SIG_DFL);
+}
+
 int
-main(int argc, const char **argv)
+main(int argc, char * const argv[])
 {
+	int ch;
 	int fdr, fdw;
-	struct lump *lp;
-	off_t 	t, d;
+	off_t t, d;
 	size_t i, j;
 	int error, flags;
 	u_char *buf;
-	u_int sectorsize, minsize;
+	u_int sectorsize;
 	time_t t1, t2;
 	struct stat sb;
 
+	while ((ch = getopt(argc, argv, "r:w:")) != -1) {
+		switch (ch) {
+			case 'w':
+				wworklist = strdup(optarg);
+				if (wworklist == NULL)
+					err(1, "Cannot allocate enough memory");
+				break;
+			case 'r':
+				rworklist = strdup(optarg);
+				if (rworklist == NULL)
+					err(1, "Cannot allocate enough memory");
+				break;
+			default:
+				usage();
+		}
+	}
+	argc -= optind;
+	argv += optind;
 
-	if (argc < 2)
-		errx(1, "Usage: %s source-drive [destination]", argv[0]);
+	if (argc < 1 || argc > 2)
+		usage();
 
-	buf = malloc(BIGSIZE);
-	if (buf == NULL)
-		err(1, "Cannot allocate %d bytes buffer", BIGSIZE);
-	fdr = open(argv[1], O_RDONLY);
+	fdr = open(argv[0], O_RDONLY);
 	if (fdr < 0)
-		err(1, "Cannot open read descriptor %s", argv[1]);
+		err(1, "Cannot open read descriptor %s", argv[0]);
 
 	error = fstat(fdr, &sb);
 	if (error < 0)
@@ -80,46 +186,66 @@
 		error = ioctl(fdr, DIOCGSECTORSIZE, &sectorsize);
 		if (error < 0)
 			err(1, "DIOCGSECTORSIZE failed");
+
+		/*
+		 * Make medsize roughly 64kB, depending on native sector
+		 * size. bigsize has to be a multiple of medsize.
+		 * For media with 2352 sectors, this will
+		 * result in 2352, 63504, and 1016064 bytes.
+		 */
 		minsize = sectorsize;
+		medsize = (medsize / sectorsize) * sectorsize;
+		bigsize = medsize * 16;
 
 		error = ioctl(fdr, DIOCGMEDIASIZE, &t);
 		if (error < 0)
 			err(1, "DIOCGMEDIASIZE failed");
 	} else {
-		sectorsize = 1;
 		t = sb.st_size;
-		minsize = MINSIZE;
 		flags |= O_CREAT | O_TRUNC;
 	}
 
-	if (argc > 2) {
-		fdw = open(argv[2], flags, DEFFILEMODE);
+	buf = malloc(bigsize);
+	if (buf == NULL)
+		err(1, "Cannot allocate %jd bytes buffer", (intmax_t)bigsize);
+
+	if (argc > 1) {
+		fdw = open(argv[1], flags, DEFFILEMODE);
 		if (fdw < 0)
-			err(1, "Cannot open write descriptor %s", argv[2]);
+			err(1, "Cannot open write descriptor %s", argv[1]);
 	} else {
 		fdw = -1;
 	}
 
-	new_lump(0, t, 0);
-	d = 0;
+	if (rworklist != NULL) {
+		d = read_worklist(t);
+	} else {
+		new_lump(0, t, 0);
+		d = 0;
+	}
+
+	if (wworklist != NULL) {
+		signal(SIGINT, sighandler);
+		/* Has this any effect on pread/pwrite at all? */
+		siginterrupt(SIGINT, 1);
+	}
 
 	t1 = 0;
+	printf("%13s %7s %13s %5s %13s %13s %9s\n",
+	    "start", "size", "len", "state", "done", "remaining", "% done");
 	for (;;) {
 		lp = TAILQ_FIRST(&lumps);
 		if (lp == NULL)
 			break;
-		TAILQ_REMOVE(&lumps, lp, list);
-		while (lp->len > 0) {
-			i = BIGSIZE;
-			if (lp->len < BIGSIZE)
-				i = lp->len;
+		while (lp->len > 0 && !aborting) {
+			i = MIN(lp->len, bigsize);
 			if (lp->state == 1)
-				i = MEDIUMSIZE;
+				i = MIN(lp->len, medsize);
 			if (lp->state > 1)
-				i = minsize;
+				i = MIN(lp->len, minsize);
 			time(&t2);
-			if (t1 != t2 || lp->len < BIGSIZE) {
-				printf("\r%13jd %7zu %13jd %3d %13jd %13jd %.8f",
+			if (t1 != t2 || lp->len < bigsize) {
+				printf("\r%13jd %7zu %13jd %5d %13jd %13jd %.7f",
 				    (intmax_t)lp->start,
 				    i, 
 				    (intmax_t)lp->len,
@@ -152,9 +278,13 @@
 			lp->start += i;
 			lp->len -= i;
 		}
+		
+		if (aborting)
+			save_worklist();
+		
+		TAILQ_REMOVE(&lumps, lp, list);
 		free(lp);
 	}
 	printf("\nCompleted\n");
-	exit (0);
+	return (0);
 }
-
Comment 3 Maxim Konovalov freebsd_committer freebsd_triage 2006-05-06 20:52:49 UTC
State Changed
From-To: open->patched

Committed to HEAD with minor modifications.  Thanks for the 
submission! 


Comment 4 Maxim Konovalov freebsd_committer freebsd_triage 2006-05-06 20:52:49 UTC
Responsible Changed
From-To: freebsd-bugs->maxim

Feedbacks trap.
Comment 5 Maxim Konovalov freebsd_committer freebsd_triage 2006-06-04 11:50:05 UTC
State Changed
From-To: patched->closed

Merged to RELENG_6.
Comment 6 commit-hook freebsd_committer freebsd_triage 2015-01-13 12:42:13 UTC
A commit references this bug:

Author: vanilla
Date: Tue Jan 13 12:41:28 UTC 2015
New revision: 376922
URL: https://svnweb.freebsd.org/changeset/ports/376922

Log:
  1: Add www/pecl-http1, repo copy from www/pecl-http
  2: add CONFLICTS.

  PR:		ports/96677
  Submitted by:	Bernard Spil <spil.oss at gmail.com>

Changes:
  head/www/Makefile
  head/www/pecl-http/Makefile
  head/www/pecl-http1/
  head/www/pecl-http1/Makefile
  head/www/pecl-http1/distinfo
  head/www/pecl-http1/pkg-descr