--- join.c Mon Mar 7 23:14:49 2005 +++ join.c Mon Mar 7 23:52:19 2005 @@ -105,7 +105,8 @@ static wchar_t default_tabchar[] = L" \t"; wchar_t *tabchar = default_tabchar;/* delimiter characters (-t) */ -int cmp(LINE *, u_long, LINE *, u_long); +int cmp(LINE *, u_long, LINE *, u_long, int); +int cmpnum(long long, long long); void fieldarg(char *); void joinlines(INPUT *, INPUT *); int mbscoll(const char *, const char *); @@ -114,7 +115,7 @@ void outfield(LINE *, u_long, int); void outoneline(INPUT *, LINE *); void outtwoline(INPUT *, LINE *, INPUT *, LINE *); -void slurp(INPUT *); +void slurp(INPUT *, int); wchar_t *towcs(const char *); void usage(void); @@ -122,7 +123,7 @@ main(int argc, char *argv[]) { INPUT *F1, *F2; - int aflag, ch, cval, vflag; + int aflag, ch, cval, nflag, vflag; char *end; setlocale(LC_ALL, ""); @@ -130,9 +131,9 @@ F1 = &input1; F2 = &input2; - aflag = vflag = 0; + aflag = nflag = vflag = 0; obsolete(argv); - while ((ch = getopt(argc, argv, "\01a:e:j:1:2:o:t:v:")) != -1) { + while ((ch = getopt(argc, argv, "\01na:e:j:1:2:o:t:v:")) != -1) { switch (ch) { case '\01': /* See comment in obsolete(). */ aflag = 1; @@ -180,6 +181,9 @@ --F1->joinf; --F2->joinf; break; + case 'n': + nflag = 1; + break; case 'o': fieldarg(optarg); break; @@ -234,26 +238,26 @@ if (F1->fp == stdin && F2->fp == stdin) errx(1, "only one input file may be stdin"); - slurp(F1); - slurp(F2); + slurp(F1, nflag); + slurp(F2, nflag); while (F1->setcnt && F2->setcnt) { - cval = cmp(F1->set, F1->joinf, F2->set, F2->joinf); + cval = cmp(F1->set, F1->joinf, F2->set, F2->joinf, nflag); if (cval == 0) { /* Oh joy, oh rapture, oh beauty divine! */ if (joinout) joinlines(F1, F2); - slurp(F1); - slurp(F2); + slurp(F1, nflag); + slurp(F2, nflag); } else if (cval < 0) { /* File 1 takes the lead... */ if (F1->unpair) joinlines(F1, NULL); - slurp(F1); + slurp(F1, nflag); } else { /* File 2 takes the lead... */ if (F2->unpair) joinlines(F2, NULL); - slurp(F2); + slurp(F2, nflag); } } @@ -264,18 +268,18 @@ if (F1->unpair) while (F1->setcnt) { joinlines(F1, NULL); - slurp(F1); + slurp(F1, nflag); } if (F2->unpair) while (F2->setcnt) { joinlines(F2, NULL); - slurp(F2); + slurp(F2, nflag); } exit(0); } void -slurp(INPUT *F) +slurp(INPUT *F, int nflag) { LINE *lp, *lastlp, tmp; size_t len; @@ -355,7 +359,7 @@ } /* See if the join field value has changed. */ - if (lastlp != NULL && cmp(lp, F->joinf, lastlp, F->joinf)) { + if (lastlp != NULL && cmp(lp, F->joinf, lastlp, F->joinf, nflag)) { F->pushbool = 1; F->pushback = F->setcnt; break; @@ -393,13 +397,25 @@ } int -cmp(LINE *lp1, u_long fieldno1, LINE *lp2, u_long fieldno2) +cmpnum(long long a, long long b) +{ + if (a < b) + return (-1); + else if (a == b) + return 0; + else + return 1; +} + +int +cmp(LINE *lp1, u_long fieldno1, LINE *lp2, u_long fieldno2, int nflag) { if (lp1->fieldcnt <= fieldno1) return (lp2->fieldcnt <= fieldno2 ? 0 : 1); if (lp2->fieldcnt <= fieldno2) return (-1); - return (mbscoll(lp1->fields[fieldno1], lp2->fields[fieldno2])); + return (nflag ? cmpnum(atoll(lp1->fields[fieldno1]), atoll(lp2->fields[fieldno2])): + mbscoll(lp1->fields[fieldno1], lp2->fields[fieldno2])); } int @@ -664,6 +680,6 @@ (void)fprintf(stderr, "%s %s\n%s\n", "usage: join [-a fileno | -v fileno ] [-e string] [-1 field]", "[-2 field]", - " [-o list] [-t char] file1 file2"); + " [-o list] [-n] [-t char] file1 file2"); exit(1); } --- join.1 Mon Mar 7 23:39:17 2005 +++ join.1 Tue Mar 8 00:00:27 2005 @@ -50,6 +50,7 @@ .Op Fl o Ar list .Bk -words .Ek +.Op Fl n .Op Fl t Ar char .Op Fl \&1 Ar field .Op Fl \&2 Ar field @@ -93,6 +94,8 @@ .It Fl e Ar string Replace empty output fields with .Ar string . +.It Fl n +Assume numerically sorted input files. .It Fl o Ar list The .Fl o @@ -158,6 +161,13 @@ without the .Fl b option. +When the option +.Fl n +is used, the files to be joined should be ordered as with +.Xr sort 1 +with +.Fl n +option. .Pp If one of the arguments .Ar file1 @@ -211,6 +221,11 @@ .Nm command conforms to .St -p1003.1-2001 . +The +.Fl n +option is a non-standard +.Fx +extension. .Sh SEE ALSO .Xr awk 1 , .Xr comm 1 ,