View | Details | Raw Unified | Return to bug 13935
Collapse All | Expand All

(-)grep/AUTHORS (-1 / +13 lines)
Lines 20-29 Link Here
20
to James Woods.  He also contributed some code to early versions of
20
to James Woods.  He also contributed some code to early versions of
21
GNU grep.
21
GNU grep.
22
22
23
Finally, I would like to thank Andrew Hume for many fascinating discussions
23
Mike Haertel would like to thank Andrew Hume for many fascinating discussions
24
of string searching issues over the years.  Hume & Sunday's excellent
24
of string searching issues over the years.  Hume & Sunday's excellent
25
paper on fast string searching (AT&T Bell Laboratories CSTR #156)
25
paper on fast string searching (AT&T Bell Laboratories CSTR #156)
26
describes some of the history of the subject, as well as providing
26
describes some of the history of the subject, as well as providing
27
exhaustive performance analysis of various implementation alternatives.
27
exhaustive performance analysis of various implementation alternatives.
28
The inner loop of GNU grep is similar to Hume & Sunday's recommended
28
The inner loop of GNU grep is similar to Hume & Sunday's recommended
29
"Tuned Boyer Moore" inner loop.
29
"Tuned Boyer Moore" inner loop.
30
31
More work was done on regex.[ch] by Ulrich Drepper and Arnold
32
Robbins. Regex is now part of GNU C library, see this package
33
for complete details and credits.
34
35
Arnold Robbins contributed to improve dfa.[ch]. In fact
36
it came straight from gawk-3.0.3 with small editing and fixes.
37
38
Many folks contributed see THANKS, if I omited someone please
39
send me email.
40
41
Alain Magloire is the current maintainer.
(-)grep/COPYING (-5 / +6 lines)
Lines 2-8 Link Here
2
		       Version 2, June 1991
2
		       Version 2, June 1991
3
3
4
 Copyright (C) 1989, 1991 Free Software Foundation, Inc.
4
 Copyright (C) 1989, 1991 Free Software Foundation, Inc.
5
                          675 Mass Ave, Cambridge, MA 02139, USA
5
     59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
6
 Everyone is permitted to copy and distribute verbatim copies
6
 Everyone is permitted to copy and distribute verbatim copies
7
 of this license document, but changing it is not allowed.
7
 of this license document, but changing it is not allowed.
8
8
Lines 279-285 Link Here
279
279
280
		     END OF TERMS AND CONDITIONS
280
		     END OF TERMS AND CONDITIONS
281
281
282
	Appendix: How to Apply These Terms to Your New Programs
282
	    How to Apply These Terms to Your New Programs
283
283
284
  If you develop a new program, and you want it to be of the greatest
284
  If you develop a new program, and you want it to be of the greatest
285
possible use to the public, the best way to achieve this is to make it
285
possible use to the public, the best way to achieve this is to make it
Lines 291-297 Link Here
291
the "copyright" line and a pointer to where the full notice is found.
291
the "copyright" line and a pointer to where the full notice is found.
292
292
293
    <one line to give the program's name and a brief idea of what it does.>
293
    <one line to give the program's name and a brief idea of what it does.>
294
    Copyright (C) 19yy  <name of author>
294
    Copyright (C) <year>  <name of author>
295
295
296
    This program is free software; you can redistribute it and/or modify
296
    This program is free software; you can redistribute it and/or modify
297
    it under the terms of the GNU General Public License as published by
297
    it under the terms of the GNU General Public License as published by
Lines 305-318 Link Here
305
305
306
    You should have received a copy of the GNU General Public License
306
    You should have received a copy of the GNU General Public License
307
    along with this program; if not, write to the Free Software
307
    along with this program; if not, write to the Free Software
308
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
308
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
309
309
310
310
Also add information on how to contact you by electronic and paper mail.
311
Also add information on how to contact you by electronic and paper mail.
311
312
312
If the program is interactive, make it output a short notice like this
313
If the program is interactive, make it output a short notice like this
313
when it starts in an interactive mode:
314
when it starts in an interactive mode:
314
315
315
    Gnomovision version 69, Copyright (C) 19yy name of author
316
    Gnomovision version 69, Copyright (C) year  name of author
316
    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317
    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317
    This is free software, and you are welcome to redistribute it
318
    This is free software, and you are welcome to redistribute it
318
    under certain conditions; type `show c' for details.
319
    under certain conditions; type `show c' for details.
(-)grep/Makefile (-2 / +3 lines)
Lines 4-13 Link Here
4
GREP_FTS=  YES
4
GREP_FTS=  YES
5
5
6
PROG=	grep
6
PROG=	grep
7
SRCS=   dfa.c grep.c getopt.c kwset.c obstack.c search.c
7
SRCS=   dfa.c grep.c getopt.c getopt1.c kwset.c obstack.c search.c memchr.c \
8
	savedir.c grepmat.c stpcpy.c
8
CFLAGS+=-DGREP -DHAVE_STRING_H=1 -DHAVE_SYS_PARAM_H=1 -DHAVE_UNISTD_H=1 \
9
CFLAGS+=-DGREP -DHAVE_STRING_H=1 -DHAVE_SYS_PARAM_H=1 -DHAVE_UNISTD_H=1 \
9
	-DHAVE_GETPAGESIZE=1 -DHAVE_MEMCHR=1 -DHAVE_STRERROR=1 \
10
	-DHAVE_GETPAGESIZE=1 -DHAVE_MEMCHR=1 -DHAVE_STRERROR=1 \
10
	-DHAVE_VALLOC=1
11
	-DHAVE_VALLOC=1 -DHAVE_DIRENT_H=1 -DVERSION=\"2.3\"
11
12
12
LINKS+= ${BINDIR}/grep ${BINDIR}/egrep \
13
LINKS+= ${BINDIR}/grep ${BINDIR}/egrep \
13
	${BINDIR}/grep ${BINDIR}/fgrep
14
	${BINDIR}/grep ${BINDIR}/fgrep
(-)grep/NEWS (+64 lines)
Lines 1-3 Link Here
1
Version 2.3:
2
3
  - When searching a binary file FOO, grep now just reports
4
    `Binary file FOO matches' instead of outputting binary data.
5
    This is typically more useful than the old behavior,
6
    and it is also more consistent with other utilities like `diff'.
7
    A file is considered to be binary if it contains a NUL (i.e. zero) byte.
8
9
    The new -a or --text option causes `grep' to assume that all
10
    input is text.  (This option has the same meaning as with `diff'.)
11
    Use it if you want binary data in your output.
12
13
  - `grep' now searches directories just like ordinary files; it no longer
14
    silently skips directories.  This is the traditional behavior of
15
    Unix text utilities (in particular, of traditional `grep').
16
    Hence `grep PATTERN DIRECTORY' should report
17
    `grep: DIRECTORY: Is a directory' on hosts where the operating system
18
    does not permit programs to read directories directly, and
19
    `grep: DIRECTORY: Binary file matches' (or nothing) otherwise.
20
21
    The new -d ACTION or --directories=ACTION option affects directory handling.
22
    `-d skip' causes `grep' to silently skip directories, as in grep 2.2;
23
    `-d read' (the default) causes `grep' to read directories if possible,
24
    as in earlier versions of grep.
25
26
  - The MS-DOS and Microsoft Windows ports now behave identically to the
27
    GNU and Unix ports with respect to binary files and directories.
28
29
Version 2.2:
30
31
Bug fix release.
32
33
  - Status error number fix.
34
  - Skipping directories removed.
35
  - Many typos fix.
36
  - -f /dev/null fix(not to consider as an empty pattern).
37
  - Checks for wctype/wchar.
38
  - -E was using the wrong matcher fix.
39
  - bug in regex char class fix
40
  - Fixes for DJGPP
41
42
Version 2.1:
43
44
This is a bug fix release(see Changelog) i.e. no new features.
45
46
  - More compliance to GNU standard.
47
  - Long options.
48
  - Internationalisation.
49
  - Use automake/autoconf.
50
  - Directory hierarchy change.
51
  - Sigvec with -e on Linux corrected.
52
  - Sigvec with -f on Linux corrected.
53
  - Sigvec with the mmap() corrected.
54
  - Bug in kwset corrected.
55
  - -q, -L and -l stop on first match.
56
  - New and improve regex.[ch] from Ulrich Drepper.
57
  - New and improve dfa.[ch] from Arnold Robbins.
58
  - Prototypes for over zealous C compiler.
59
  - Not scanning a file, if it's a directory
60
    (cause problems on Sun).
61
  - Ported to MS-DOS/MS-Windows with DJGPP tools.
62
63
See Changelog for the full story and proper credits.
64
1
Version 2.0:
65
Version 2.0:
2
66
3
The most important user visible change is that egrep and fgrep have
67
The most important user visible change is that egrep and fgrep have
(-)grep/PROJECTS (-15 lines)
Lines 1-15 Link Here
1
Write Texinfo documentation for grep.  The manual page would be a good
2
place to start, but Info documents are also supposed to contain a
3
tutorial and examples.
4
5
Fix the DFA matcher to never use exponential space.  (Fortunately, these
6
cases are rare.)
7
8
Improve the performance of the regex backtracking matcher.  This matcher
9
is agonizingly slow, and is responsible for grep sometimes being slower
10
than Unix grep when backreferences are used.
11
12
Provide support for the Posix [= =] and [. .] constructs.  This is
13
difficult because it requires locale-dependent details of the character
14
set and collating sequence, but Posix does not standardize any method
15
for accessing this information!
(-)grep/alloca.c (+504 lines)
Line 0 Link Here
1
/* alloca.c -- allocate automatically reclaimed memory
2
   (Mostly) portable public-domain implementation -- D A Gwyn
3
4
   This implementation of the PWB library alloca function,
5
   which is used to allocate space off the run-time stack so
6
   that it is automatically reclaimed upon procedure exit,
7
   was inspired by discussions with J. Q. Johnson of Cornell.
8
   J.Otto Tennant <jot@cray.com> contributed the Cray support.
9
10
   There are some preprocessor constants that can
11
   be defined when compiling for your specific system, for
12
   improved efficiency; however, the defaults should be okay.
13
14
   The general concept of this implementation is to keep
15
   track of all alloca-allocated blocks, and reclaim any
16
   that are found to be deeper in the stack than the current
17
   invocation.  This heuristic does not reclaim storage as
18
   soon as it becomes invalid, but it will do so eventually.
19
20
   As a special case, alloca(0) reclaims storage without
21
   allocating any.  It is a good idea to use alloca(0) in
22
   your main control loop, etc. to force garbage collection.  */
23
24
#ifdef HAVE_CONFIG_H
25
#include <config.h>
26
#endif
27
28
#ifdef HAVE_STRING_H
29
#include <string.h>
30
#endif
31
#ifdef HAVE_STDLIB_H
32
#include <stdlib.h>
33
#endif
34
35
#ifdef emacs
36
#include "blockinput.h"
37
#endif
38
39
/* If compiling with GCC 2, this file's not needed.  */
40
#if !defined (__GNUC__) || __GNUC__ < 2
41
42
/* If someone has defined alloca as a macro,
43
   there must be some other way alloca is supposed to work.  */
44
#ifndef alloca
45
46
#ifdef emacs
47
#ifdef static
48
/* actually, only want this if static is defined as ""
49
   -- this is for usg, in which emacs must undefine static
50
   in order to make unexec workable
51
   */
52
#ifndef STACK_DIRECTION
53
you
54
lose
55
-- must know STACK_DIRECTION at compile-time
56
#endif /* STACK_DIRECTION undefined */
57
#endif /* static */
58
#endif /* emacs */
59
60
/* If your stack is a linked list of frames, you have to
61
   provide an "address metric" ADDRESS_FUNCTION macro.  */
62
63
#if defined (CRAY) && defined (CRAY_STACKSEG_END)
64
long i00afunc ();
65
#define ADDRESS_FUNCTION(arg) (char *) i00afunc (&(arg))
66
#else
67
#define ADDRESS_FUNCTION(arg) &(arg)
68
#endif
69
70
#if __STDC__
71
typedef void *pointer;
72
#else
73
typedef char *pointer;
74
#endif
75
76
#ifndef NULL
77
#define	NULL	0
78
#endif
79
80
/* Different portions of Emacs need to call different versions of
81
   malloc.  The Emacs executable needs alloca to call xmalloc, because
82
   ordinary malloc isn't protected from input signals.  On the other
83
   hand, the utilities in lib-src need alloca to call malloc; some of
84
   them are very simple, and don't have an xmalloc routine.
85
86
   Non-Emacs programs expect this to call use xmalloc.
87
88
   Callers below should use malloc.  */
89
90
#ifndef emacs
91
#define malloc xmalloc
92
#endif
93
extern pointer malloc ();
94
95
/* Define STACK_DIRECTION if you know the direction of stack
96
   growth for your system; otherwise it will be automatically
97
   deduced at run-time.
98
99
   STACK_DIRECTION > 0 => grows toward higher addresses
100
   STACK_DIRECTION < 0 => grows toward lower addresses
101
   STACK_DIRECTION = 0 => direction of growth unknown  */
102
103
#ifndef STACK_DIRECTION
104
#define	STACK_DIRECTION	0	/* Direction unknown.  */
105
#endif
106
107
#if STACK_DIRECTION != 0
108
109
#define	STACK_DIR	STACK_DIRECTION	/* Known at compile-time.  */
110
111
#else /* STACK_DIRECTION == 0; need run-time code.  */
112
113
static int stack_dir;		/* 1 or -1 once known.  */
114
#define	STACK_DIR	stack_dir
115
116
static void
117
find_stack_direction ()
118
{
119
  static char *addr = NULL;	/* Address of first `dummy', once known.  */
120
  auto char dummy;		/* To get stack address.  */
121
122
  if (addr == NULL)
123
    {				/* Initial entry.  */
124
      addr = ADDRESS_FUNCTION (dummy);
125
126
      find_stack_direction ();	/* Recurse once.  */
127
    }
128
  else
129
    {
130
      /* Second entry.  */
131
      if (ADDRESS_FUNCTION (dummy) > addr)
132
	stack_dir = 1;		/* Stack grew upward.  */
133
      else
134
	stack_dir = -1;		/* Stack grew downward.  */
135
    }
136
}
137
138
#endif /* STACK_DIRECTION == 0 */
139
140
/* An "alloca header" is used to:
141
   (a) chain together all alloca'ed blocks;
142
   (b) keep track of stack depth.
143
144
   It is very important that sizeof(header) agree with malloc
145
   alignment chunk size.  The following default should work okay.  */
146
147
#ifndef	ALIGN_SIZE
148
#define	ALIGN_SIZE	sizeof(double)
149
#endif
150
151
typedef union hdr
152
{
153
  char align[ALIGN_SIZE];	/* To force sizeof(header).  */
154
  struct
155
    {
156
      union hdr *next;		/* For chaining headers.  */
157
      char *deep;		/* For stack depth measure.  */
158
    } h;
159
} header;
160
161
static header *last_alloca_header = NULL;	/* -> last alloca header.  */
162
163
/* Return a pointer to at least SIZE bytes of storage,
164
   which will be automatically reclaimed upon exit from
165
   the procedure that called alloca.  Originally, this space
166
   was supposed to be taken from the current stack frame of the
167
   caller, but that method cannot be made to work for some
168
   implementations of C, for example under Gould's UTX/32.  */
169
170
pointer
171
alloca (size)
172
     unsigned size;
173
{
174
  auto char probe;		/* Probes stack depth: */
175
  register char *depth = ADDRESS_FUNCTION (probe);
176
177
#if STACK_DIRECTION == 0
178
  if (STACK_DIR == 0)		/* Unknown growth direction.  */
179
    find_stack_direction ();
180
#endif
181
182
  /* Reclaim garbage, defined as all alloca'd storage that
183
     was allocated from deeper in the stack than currently.  */
184
185
  {
186
    register header *hp;	/* Traverses linked list.  */
187
188
#ifdef emacs
189
    BLOCK_INPUT;
190
#endif
191
192
    for (hp = last_alloca_header; hp != NULL;)
193
      if ((STACK_DIR > 0 && hp->h.deep > depth)
194
	  || (STACK_DIR < 0 && hp->h.deep < depth))
195
	{
196
	  register header *np = hp->h.next;
197
198
	  free ((pointer) hp);	/* Collect garbage.  */
199
200
	  hp = np;		/* -> next header.  */
201
	}
202
      else
203
	break;			/* Rest are not deeper.  */
204
205
    last_alloca_header = hp;	/* -> last valid storage.  */
206
207
#ifdef emacs
208
    UNBLOCK_INPUT;
209
#endif
210
  }
211
212
  if (size == 0)
213
    return NULL;		/* No allocation required.  */
214
215
  /* Allocate combined header + user data storage.  */
216
217
  {
218
    register pointer new = malloc (sizeof (header) + size);
219
    /* Address of header.  */
220
221
    if (new == 0)
222
      abort();
223
224
    ((header *) new)->h.next = last_alloca_header;
225
    ((header *) new)->h.deep = depth;
226
227
    last_alloca_header = (header *) new;
228
229
    /* User storage begins just after header.  */
230
231
    return (pointer) ((char *) new + sizeof (header));
232
  }
233
}
234
235
#if defined (CRAY) && defined (CRAY_STACKSEG_END)
236
237
#ifdef DEBUG_I00AFUNC
238
#include <stdio.h>
239
#endif
240
241
#ifndef CRAY_STACK
242
#define CRAY_STACK
243
#ifndef CRAY2
244
/* Stack structures for CRAY-1, CRAY X-MP, and CRAY Y-MP */
245
struct stack_control_header
246
  {
247
    long shgrow:32;		/* Number of times stack has grown.  */
248
    long shaseg:32;		/* Size of increments to stack.  */
249
    long shhwm:32;		/* High water mark of stack.  */
250
    long shsize:32;		/* Current size of stack (all segments).  */
251
  };
252
253
/* The stack segment linkage control information occurs at
254
   the high-address end of a stack segment.  (The stack
255
   grows from low addresses to high addresses.)  The initial
256
   part of the stack segment linkage control information is
257
   0200 (octal) words.  This provides for register storage
258
   for the routine which overflows the stack.  */
259
260
struct stack_segment_linkage
261
  {
262
    long ss[0200];		/* 0200 overflow words.  */
263
    long sssize:32;		/* Number of words in this segment.  */
264
    long ssbase:32;		/* Offset to stack base.  */
265
    long:32;
266
    long sspseg:32;		/* Offset to linkage control of previous
267
				   segment of stack.  */
268
    long:32;
269
    long sstcpt:32;		/* Pointer to task common address block.  */
270
    long sscsnm;		/* Private control structure number for
271
				   microtasking.  */
272
    long ssusr1;		/* Reserved for user.  */
273
    long ssusr2;		/* Reserved for user.  */
274
    long sstpid;		/* Process ID for pid based multi-tasking.  */
275
    long ssgvup;		/* Pointer to multitasking thread giveup.  */
276
    long sscray[7];		/* Reserved for Cray Research.  */
277
    long ssa0;
278
    long ssa1;
279
    long ssa2;
280
    long ssa3;
281
    long ssa4;
282
    long ssa5;
283
    long ssa6;
284
    long ssa7;
285
    long sss0;
286
    long sss1;
287
    long sss2;
288
    long sss3;
289
    long sss4;
290
    long sss5;
291
    long sss6;
292
    long sss7;
293
  };
294
295
#else /* CRAY2 */
296
/* The following structure defines the vector of words
297
   returned by the STKSTAT library routine.  */
298
struct stk_stat
299
  {
300
    long now;			/* Current total stack size.  */
301
    long maxc;			/* Amount of contiguous space which would
302
				   be required to satisfy the maximum
303
				   stack demand to date.  */
304
    long high_water;		/* Stack high-water mark.  */
305
    long overflows;		/* Number of stack overflow ($STKOFEN) calls.  */
306
    long hits;			/* Number of internal buffer hits.  */
307
    long extends;		/* Number of block extensions.  */
308
    long stko_mallocs;		/* Block allocations by $STKOFEN.  */
309
    long underflows;		/* Number of stack underflow calls ($STKRETN).  */
310
    long stko_free;		/* Number of deallocations by $STKRETN.  */
311
    long stkm_free;		/* Number of deallocations by $STKMRET.  */
312
    long segments;		/* Current number of stack segments.  */
313
    long maxs;			/* Maximum number of stack segments so far.  */
314
    long pad_size;		/* Stack pad size.  */
315
    long current_address;	/* Current stack segment address.  */
316
    long current_size;		/* Current stack segment size.  This
317
				   number is actually corrupted by STKSTAT to
318
				   include the fifteen word trailer area.  */
319
    long initial_address;	/* Address of initial segment.  */
320
    long initial_size;		/* Size of initial segment.  */
321
  };
322
323
/* The following structure describes the data structure which trails
324
   any stack segment.  I think that the description in 'asdef' is
325
   out of date.  I only describe the parts that I am sure about.  */
326
327
struct stk_trailer
328
  {
329
    long this_address;		/* Address of this block.  */
330
    long this_size;		/* Size of this block (does not include
331
				   this trailer).  */
332
    long unknown2;
333
    long unknown3;
334
    long link;			/* Address of trailer block of previous
335
				   segment.  */
336
    long unknown5;
337
    long unknown6;
338
    long unknown7;
339
    long unknown8;
340
    long unknown9;
341
    long unknown10;
342
    long unknown11;
343
    long unknown12;
344
    long unknown13;
345
    long unknown14;
346
  };
347
348
#endif /* CRAY2 */
349
#endif /* not CRAY_STACK */
350
351
#ifdef CRAY2
352
/* Determine a "stack measure" for an arbitrary ADDRESS.
353
   I doubt that "lint" will like this much.  */
354
355
static long
356
i00afunc (long *address)
357
{
358
  struct stk_stat status;
359
  struct stk_trailer *trailer;
360
  long *block, size;
361
  long result = 0;
362
363
  /* We want to iterate through all of the segments.  The first
364
     step is to get the stack status structure.  We could do this
365
     more quickly and more directly, perhaps, by referencing the
366
     $LM00 common block, but I know that this works.  */
367
368
  STKSTAT (&status);
369
370
  /* Set up the iteration.  */
371
372
  trailer = (struct stk_trailer *) (status.current_address
373
				    + status.current_size
374
				    - 15);
375
376
  /* There must be at least one stack segment.  Therefore it is
377
     a fatal error if "trailer" is null.  */
378
379
  if (trailer == 0)
380
    abort ();
381
382
  /* Discard segments that do not contain our argument address.  */
383
384
  while (trailer != 0)
385
    {
386
      block = (long *) trailer->this_address;
387
      size = trailer->this_size;
388
      if (block == 0 || size == 0)
389
	abort ();
390
      trailer = (struct stk_trailer *) trailer->link;
391
      if ((block <= address) && (address < (block + size)))
392
	break;
393
    }
394
395
  /* Set the result to the offset in this segment and add the sizes
396
     of all predecessor segments.  */
397
398
  result = address - block;
399
400
  if (trailer == 0)
401
    {
402
      return result;
403
    }
404
405
  do
406
    {
407
      if (trailer->this_size <= 0)
408
	abort ();
409
      result += trailer->this_size;
410
      trailer = (struct stk_trailer *) trailer->link;
411
    }
412
  while (trailer != 0);
413
414
  /* We are done.  Note that if you present a bogus address (one
415
     not in any segment), you will get a different number back, formed
416
     from subtracting the address of the first block.  This is probably
417
     not what you want.  */
418
419
  return (result);
420
}
421
422
#else /* not CRAY2 */
423
/* Stack address function for a CRAY-1, CRAY X-MP, or CRAY Y-MP.
424
   Determine the number of the cell within the stack,
425
   given the address of the cell.  The purpose of this
426
   routine is to linearize, in some sense, stack addresses
427
   for alloca.  */
428
429
static long
430
i00afunc (long address)
431
{
432
  long stkl = 0;
433
434
  long size, pseg, this_segment, stack;
435
  long result = 0;
436
437
  struct stack_segment_linkage *ssptr;
438
439
  /* Register B67 contains the address of the end of the
440
     current stack segment.  If you (as a subprogram) store
441
     your registers on the stack and find that you are past
442
     the contents of B67, you have overflowed the segment.
443
444
     B67 also points to the stack segment linkage control
445
     area, which is what we are really interested in.  */
446
447
  stkl = CRAY_STACKSEG_END ();
448
  ssptr = (struct stack_segment_linkage *) stkl;
449
450
  /* If one subtracts 'size' from the end of the segment,
451
     one has the address of the first word of the segment.
452
453
     If this is not the first segment, 'pseg' will be
454
     nonzero.  */
455
456
  pseg = ssptr->sspseg;
457
  size = ssptr->sssize;
458
459
  this_segment = stkl - size;
460
461
  /* It is possible that calling this routine itself caused
462
     a stack overflow.  Discard stack segments which do not
463
     contain the target address.  */
464
465
  while (!(this_segment <= address && address <= stkl))
466
    {
467
#ifdef DEBUG_I00AFUNC
468
      fprintf (stderr, "%011o %011o %011o\n", this_segment, address, stkl);
469
#endif
470
      if (pseg == 0)
471
	break;
472
      stkl = stkl - pseg;
473
      ssptr = (struct stack_segment_linkage *) stkl;
474
      size = ssptr->sssize;
475
      pseg = ssptr->sspseg;
476
      this_segment = stkl - size;
477
    }
478
479
  result = address - this_segment;
480
481
  /* If you subtract pseg from the current end of the stack,
482
     you get the address of the previous stack segment's end.
483
     This seems a little convoluted to me, but I'll bet you save
484
     a cycle somewhere.  */
485
486
  while (pseg != 0)
487
    {
488
#ifdef DEBUG_I00AFUNC
489
      fprintf (stderr, "%011o %011o\n", pseg, size);
490
#endif
491
      stkl = stkl - pseg;
492
      ssptr = (struct stack_segment_linkage *) stkl;
493
      size = ssptr->sssize;
494
      pseg = ssptr->sspseg;
495
      result += size;
496
    }
497
  return (result);
498
}
499
500
#endif /* not CRAY2 */
501
#endif /* CRAY */
502
503
#endif /* no alloca */
504
#endif /* not GCC version 2 */
(-)grep/btowc.c (+52 lines)
Line 0 Link Here
1
/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
2
   This file is part of the GNU C Library.
3
   Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>
4
5
   The GNU C Library is free software; you can redistribute it and/or
6
   modify it under the terms of the GNU Library General Public License as
7
   published by the Free Software Foundation; either version 2 of the
8
   License, or (at your option) any later version.
9
10
   The GNU C Library is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
   Library General Public License for more details.
14
15
   You should have received a copy of the GNU Library General Public
16
   License along with the GNU C Library; see the file COPYING.LIB.  If not,
17
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18
   Boston, MA 02111-1307, USA.  */
19
20
#ifdef HAVE_CONFIG_H
21
#include <config.h>
22
#endif
23
24
#include <stdio.h>
25
26
#if ! defined(HAVE_WCHAR_H) || defined(__CYGWIN__)
27
typedef unsigned int wint_t;
28
# undef WEOF
29
# define WEOF ((wint_t)-1)
30
#else
31
#include <wchar.h>
32
#endif
33
34
#ifndef weak_alias
35
#  define __btowc btowc
36
#endif
37
38
/* We use UTF8 encoding for multibyte strings and therefore a valid
39
   one byte multibyte string only can have a value from 0 to 0x7f.  */
40
wint_t
41
__btowc (c)
42
     int c;
43
{
44
  if (WEOF != (wint_t) EOF || c < 0 || c > 0x7f)
45
    return WEOF;
46
  else
47
    return (wint_t) c;
48
}
49
50
#ifdef weak_alias
51
weak_alias (__btowc, btowc)
52
#endif
(-)grep/dfa.c (-183 / +236 lines)
Lines 1-5 Link Here
1
/* dfa.c - deterministic extended regexp routines for GNU
1
/* dfa.c - deterministic extended regexp routines for GNU
2
   Copyright (C) 1988 Free Software Foundation, Inc.
2
   Copyright (C) 1988, 1998 Free Software Foundation, Inc.
3
3
4
   This program is free software; you can redistribute it and/or modify
4
   This program is free software; you can redistribute it and/or modify
5
   it under the terms of the GNU General Public License as published by
5
   it under the terms of the GNU General Public License as published by
Lines 13-31 Link Here
13
13
14
   You should have received a copy of the GNU General Public License
14
   You should have received a copy of the GNU General Public License
15
   along with this program; if not, write to the Free Software
15
   along with this program; if not, write to the Free Software
16
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
16
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA */
17
17
18
/* Written June, 1988 by Mike Haertel
18
/* Written June, 1988 by Mike Haertel
19
   Modified July, 1988 by Arthur David Olson to assist BMG speedups  */
19
   Modified July, 1988 by Arthur David Olson to assist BMG speedups  */
20
20
21
#ifdef HAVE_CONFIG_H
22
#include <config.h>
23
#endif
24
21
#include <assert.h>
25
#include <assert.h>
22
#include <ctype.h>
26
#include <ctype.h>
23
#include <stdio.h>
27
#include <stdio.h>
24
28
29
#include <sys/types.h>
25
#ifdef STDC_HEADERS
30
#ifdef STDC_HEADERS
26
#include <stdlib.h>
31
#include <stdlib.h>
27
#else
32
#else
28
#include <sys/types.h>
29
extern char *calloc(), *malloc(), *realloc();
33
extern char *calloc(), *malloc(), *realloc();
30
extern void free();
34
extern void free();
31
#endif
35
#endif
Lines 38-96 Link Here
38
#include <strings.h>
42
#include <strings.h>
39
#endif
43
#endif
40
44
45
#ifndef DEBUG	/* use the same approach as regex.c */
46
#undef assert
47
#define assert(e)
48
#endif /* DEBUG */
49
41
#ifndef isgraph
50
#ifndef isgraph
42
#define isgraph(C) (isprint((unsigned char)C) && !isspace((unsigned char)C))
51
#define isgraph(C) (isprint(C) && !isspace(C))
43
#endif
52
#endif
44
53
45
#define ISALPHA(C) isalpha((unsigned char)C)
54
#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
46
#define ISUPPER(C) isupper((unsigned char)C)
55
#define ISALPHA(C) isalpha(C)
47
#define ISLOWER(C) islower((unsigned char)C)
56
#define ISUPPER(C) isupper(C)
48
#define ISDIGIT(C) isdigit((unsigned char)C)
57
#define ISLOWER(C) islower(C)
49
#define ISXDIGIT(C) isxdigit((unsigned char)C)
58
#define ISDIGIT(C) isdigit(C)
50
#define ISSPACE(C) isspace((unsigned char)C)
59
#define ISXDIGIT(C) isxdigit(C)
51
#define ISPUNCT(C) ispunct((unsigned char)C)
60
#define ISSPACE(C) isspace(C)
52
#define ISALNUM(C) isalnum((unsigned char)C)
61
#define ISPUNCT(C) ispunct(C)
53
#define ISPRINT(C) isprint((unsigned char)C)
62
#define ISALNUM(C) isalnum(C)
54
#define ISGRAPH(C) isgraph((unsigned char)C)
63
#define ISPRINT(C) isprint(C)
55
#define ISCNTRL(C) iscntrl((unsigned char)C)
64
#define ISGRAPH(C) isgraph(C)
65
#define ISCNTRL(C) iscntrl(C)
66
#else
67
#define ISALPHA(C) (isascii(C) && isalpha(C))
68
#define ISUPPER(C) (isascii(C) && isupper(C))
69
#define ISLOWER(C) (isascii(C) && islower(C))
70
#define ISDIGIT(C) (isascii(C) && isdigit(C))
71
#define ISXDIGIT(C) (isascii(C) && isxdigit(C))
72
#define ISSPACE(C) (isascii(C) && isspace(C))
73
#define ISPUNCT(C) (isascii(C) && ispunct(C))
74
#define ISALNUM(C) (isascii(C) && isalnum(C))
75
#define ISPRINT(C) (isascii(C) && isprint(C))
76
#define ISGRAPH(C) (isascii(C) && isgraph(C))
77
#define ISCNTRL(C) (isascii(C) && iscntrl(C))
78
#endif
79
80
/* If we (don't) have I18N.  */
81
/* glibc defines _ */
82
#ifndef _
83
# ifdef HAVE_LIBINTL_H
84
#  include <libintl.h>
85
#  ifndef _
86
#   define _(Str) gettext (Str)
87
#  endif
88
# else
89
#  define _(Str) (Str)
90
# endif
91
#endif
56
92
93
#include "regex.h"
57
#include "dfa.h"
94
#include "dfa.h"
58
#include <gnuregex.h>
59
95
60
#if __STDC__
96
/* HPUX, define those as macros in sys/param.h */
61
typedef void *ptr_t;
97
#ifdef setbit
62
#else
98
# undef setbit
63
typedef char *ptr_t;
99
#endif
100
#ifdef clrbit
101
# undef clrbit
64
#endif
102
#endif
65
103
66
static void	dfamust();
104
static void dfamust PARAMS ((struct dfa *dfa));
67
105
68
#ifdef __FreeBSD__
106
static ptr_t xcalloc PARAMS ((size_t n, size_t s));
69
static int collate_range_cmp (a, b)
107
static ptr_t xmalloc PARAMS ((size_t n));
70
	int a, b;
108
static ptr_t xrealloc PARAMS ((ptr_t p, size_t n));
71
{
109
#ifdef DEBUG
72
	int r;
110
static void prtok PARAMS ((token t));
73
	static char s[2][2];
74
75
	if ((unsigned char)a == (unsigned char)b)
76
		return 0;
77
	s[0][0] = a;
78
	s[1][0] = b;
79
	if ((r = strcoll(s[0], s[1])) == 0)
80
		r = (unsigned char)a - (unsigned char)b;
81
	return r;
82
}
83
#endif
111
#endif
112
static int tstbit PARAMS ((int b, charclass c));
113
static void setbit PARAMS ((int b, charclass c));
114
static void clrbit PARAMS ((int b, charclass c));
115
static void copyset PARAMS ((charclass src, charclass dst));
116
static void zeroset PARAMS ((charclass s));
117
static void notset PARAMS ((charclass s));
118
static int equal PARAMS ((charclass s1, charclass s2));
119
static int charclass_index PARAMS ((charclass s));
120
static int looking_at PARAMS ((const char *s));
121
static token lex PARAMS ((void));
122
static void addtok PARAMS ((token t));
123
static void atom PARAMS ((void));
124
static int nsubtoks PARAMS ((int tindex));
125
static void copytoks PARAMS ((int tindex, int ntokens));
126
static void closure PARAMS ((void));
127
static void branch PARAMS ((void));
128
static void regexp PARAMS ((int toplevel));
129
static void copy PARAMS ((position_set *src, position_set *dst));
130
static void insert PARAMS ((position p, position_set *s));
131
static void merge PARAMS ((position_set *s1, position_set *s2, position_set *m));
132
static void delete PARAMS ((position p, position_set *s));
133
static int state_index PARAMS ((struct dfa *d, position_set *s,
134
			  int newline, int letter));
135
static void build_state PARAMS ((int s, struct dfa *d));
136
static void build_state_zero PARAMS ((struct dfa *d));
137
static char *icatalloc PARAMS ((char *old, char *new));
138
static char *icpyalloc PARAMS ((char *string));
139
static char *istrstr PARAMS ((char *lookin, char *lookfor));
140
static void ifree PARAMS ((char *cp));
141
static void freelist PARAMS ((char **cpp));
142
static char **enlist PARAMS ((char **cpp, char *new, size_t len));
143
static char **comsubs PARAMS ((char *left, char *right));
144
static char **addlists PARAMS ((char **old, char **new));
145
static char **inboth PARAMS ((char **left, char **right));
84
146
85
static ptr_t
147
static ptr_t
86
xcalloc(n, s)
148
xcalloc(n, s)
87
     int n;
149
     size_t n;
88
     size_t s;
150
     size_t s;
89
{
151
{
90
  ptr_t r = calloc(n, s);
152
  ptr_t r = calloc(n, s);
91
153
92
  if (!r)
154
  if (!r)
93
    dfaerror("Memory exhausted");
155
    dfaerror(_("Memory exhausted"));
94
  return r;
156
  return r;
95
}
157
}
96
158
Lines 102-108 Link Here
102
164
103
  assert(n != 0);
165
  assert(n != 0);
104
  if (!r)
166
  if (!r)
105
    dfaerror("Memory exhausted");
167
    dfaerror(_("Memory exhausted"));
106
  return r;
168
  return r;
107
}
169
}
108
170
Lines 115-125 Link Here
115
177
116
  assert(n != 0);
178
  assert(n != 0);
117
  if (!r)
179
  if (!r)
118
    dfaerror("Memory exhausted");
180
    dfaerror(_("Memory exhausted"));
119
  return r;
181
  return r;
120
}
182
}
121
183
122
#define CALLOC(p, t, n) ((p) = (t *) xcalloc((n), sizeof (t)))
184
#define CALLOC(p, t, n) ((p) = (t *) xcalloc((size_t)(n), sizeof (t)))
123
#define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t)))
185
#define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t)))
124
#define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t)))
186
#define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t)))
125
187
Lines 261-267 Link Here
261
}
323
}
262
324
263
/* Syntax bits controlling the behavior of the lexical analyzer. */
325
/* Syntax bits controlling the behavior of the lexical analyzer. */
264
static int syntax_bits, syntax_bits_set;
326
static reg_syntax_t syntax_bits, syntax_bits_set;
265
327
266
/* Flag for case-folding letters into sets. */
328
/* Flag for case-folding letters into sets. */
267
static int case_fold;
329
static int case_fold;
Lines 269-275 Link Here
269
/* Entry point to set syntax options. */
331
/* Entry point to set syntax options. */
270
void
332
void
271
dfasyntax(bits, fold)
333
dfasyntax(bits, fold)
272
     int bits;
334
     reg_syntax_t bits;
273
     int fold;
335
     int fold;
274
{
336
{
275
  syntax_bits_set = 1;
337
  syntax_bits_set = 1;
Lines 284-290 Link Here
284
346
285
static char *lexstart;		/* Pointer to beginning of input string. */
347
static char *lexstart;		/* Pointer to beginning of input string. */
286
static char *lexptr;		/* Pointer to next input character. */
348
static char *lexptr;		/* Pointer to next input character. */
287
static lexleft;			/* Number of characters remaining. */
349
static int lexleft;		/* Number of characters remaining. */
288
static token lasttok;		/* Previous token returned; initially END. */
350
static token lasttok;		/* Previous token returned; initially END. */
289
static int laststart;		/* True if we're separated from beginning or (, |
351
static int laststart;		/* True if we're separated from beginning or (, |
290
				   only by zero-width characters. */
352
				   only by zero-width characters. */
Lines 298-309 Link Here
298
      if (eoferr != 0)	   	      \
360
      if (eoferr != 0)	   	      \
299
	dfaerror(eoferr);  	      \
361
	dfaerror(eoferr);  	      \
300
      else		   	      \
362
      else		   	      \
301
	return END;	   	      \
363
	return lasttok = END;	      \
302
    (c) = (unsigned char) *lexptr++;  \
364
    (c) = (unsigned char) *lexptr++;  \
303
    --lexleft;		   	      \
365
    --lexleft;		   	      \
304
  }
366
  }
305
367
368
#ifdef __STDC__
369
#define FUNC(F, P) static int F(int c) { return P(c); }
370
#else
306
#define FUNC(F, P) static int F(c) int c; { return P(c); }
371
#define FUNC(F, P) static int F(c) int c; { return P(c); }
372
#endif
307
373
308
FUNC(is_alpha, ISALPHA)
374
FUNC(is_alpha, ISALPHA)
309
FUNC(is_upper, ISUPPER)
375
FUNC(is_upper, ISUPPER)
Lines 317-348 Link Here
317
FUNC(is_graph, ISGRAPH)
383
FUNC(is_graph, ISGRAPH)
318
FUNC(is_cntrl, ISCNTRL)
384
FUNC(is_cntrl, ISCNTRL)
319
385
386
static int is_blank(c)
387
int c;
388
{
389
   return (c == ' ' || c == '\t');
390
}
391
320
/* The following list maps the names of the Posix named character classes
392
/* The following list maps the names of the Posix named character classes
321
   to predicate functions that determine whether a given character is in
393
   to predicate functions that determine whether a given character is in
322
   the class.  The leading [ has already been eaten by the lexical analyzer. */
394
   the class.  The leading [ has already been eaten by the lexical analyzer. */
323
static struct {
395
static struct {
324
  char *name;
396
  const char *name;
325
  int (*pred)();
397
  int (*pred) PARAMS ((int));
326
} prednames[] = {
398
} prednames[] = {
327
  ":alpha:]", is_alpha,
399
  { ":alpha:]", is_alpha },
328
  ":upper:]", is_upper,
400
  { ":upper:]", is_upper },
329
  ":lower:]", is_lower,
401
  { ":lower:]", is_lower },
330
  ":digit:]", is_digit,
402
  { ":digit:]", is_digit },
331
  ":xdigit:]", is_xdigit,
403
  { ":xdigit:]", is_xdigit },
332
  ":space:]", is_space,
404
  { ":space:]", is_space },
333
  ":punct:]", is_punct,
405
  { ":punct:]", is_punct },
334
  ":alnum:]", is_alnum,
406
  { ":alnum:]", is_alnum },
335
  ":print:]", is_print,
407
  { ":print:]", is_print },
336
  ":graph:]", is_graph,
408
  { ":graph:]", is_graph },
337
  ":cntrl:]", is_cntrl,
409
  { ":cntrl:]", is_cntrl },
338
  0
410
  { ":blank:]", is_blank },
411
  { 0 }
339
};
412
};
340
413
414
/* Return non-zero if C is a `word-constituent' byte; zero otherwise.  */
415
#define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_')
416
341
static int
417
static int
342
looking_at(s)
418
looking_at(s)
343
     char *s;
419
     const char *s;
344
{
420
{
345
  int len;
421
  size_t len;
346
422
347
  len = strlen(s);
423
  len = strlen(s);
348
  if (lexleft < len)
424
  if (lexleft < len)
Lines 373-379 Link Here
373
	  if (backslash)
449
	  if (backslash)
374
	    goto normal_char;
450
	    goto normal_char;
375
	  if (lexleft == 0)
451
	  if (lexleft == 0)
376
	    dfaerror("Unfinished \\ escape");
452
	    dfaerror(_("Unfinished \\ escape"));
377
	  backslash = 1;
453
	  backslash = 1;
378
	  break;
454
	  break;
379
455
Lines 419-441 Link Here
419
	    }
495
	    }
420
	  goto normal_char;
496
	  goto normal_char;
421
497
498
	case '`':
499
	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
500
	    return lasttok = BEGLINE;	/* FIXME: should be beginning of string */
501
	  goto normal_char;
502
503
	case '\'':
504
	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
505
	    return lasttok = ENDLINE;	/* FIXME: should be end of string */
506
	  goto normal_char;
507
422
	case '<':
508
	case '<':
423
	  if (backslash)
509
	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
424
	    return lasttok = BEGWORD;
510
	    return lasttok = BEGWORD;
425
	  goto normal_char;
511
	  goto normal_char;
426
512
427
	case '>':
513
	case '>':
428
	  if (backslash)
514
	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
429
	    return lasttok = ENDWORD;
515
	    return lasttok = ENDWORD;
430
	  goto normal_char;
516
	  goto normal_char;
431
517
432
	case 'b':
518
	case 'b':
433
	  if (backslash)
519
	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
434
	    return lasttok = LIMWORD;
520
	    return lasttok = LIMWORD;
435
	  goto normal_char;
521
	  goto normal_char;
436
522
437
	case 'B':
523
	case 'B':
438
	  if (backslash)
524
	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
439
	    return lasttok = NOTLIMWORD;
525
	    return lasttok = NOTLIMWORD;
440
	  goto normal_char;
526
	  goto normal_char;
441
527
Lines 475-498 Link Here
475
	     {M,} - minimum count, maximum is infinity
561
	     {M,} - minimum count, maximum is infinity
476
	     {,M} - 0 through M
562
	     {,M} - 0 through M
477
	     {M,N} - M through N */
563
	     {M,N} - M through N */
478
	  FETCH(c, "unfinished repeat count");
564
	  FETCH(c, _("unfinished repeat count"));
479
	  if (ISDIGIT(c))
565
	  if (ISDIGIT(c))
480
	    {
566
	    {
481
	      minrep = c - '0';
567
	      minrep = c - '0';
482
	      for (;;)
568
	      for (;;)
483
		{
569
		{
484
		  FETCH(c, "unfinished repeat count");
570
		  FETCH(c, _("unfinished repeat count"));
485
		  if (!ISDIGIT(c))
571
		  if (!ISDIGIT(c))
486
		    break;
572
		    break;
487
		  minrep = 10 * minrep + c - '0';
573
		  minrep = 10 * minrep + c - '0';
488
		}
574
		}
489
	    }
575
	    }
490
	  else if (c != ',')
576
	  else if (c != ',')
491
	    dfaerror("malformed repeat count");
577
	    dfaerror(_("malformed repeat count"));
492
	  if (c == ',')
578
	  if (c == ',')
493
	    for (;;)
579
	    for (;;)
494
	      {
580
	      {
495
		FETCH(c, "unfinished repeat count");
581
		FETCH(c, _("unfinished repeat count"));
496
		if (!ISDIGIT(c))
582
		if (!ISDIGIT(c))
497
		  break;
583
		  break;
498
		maxrep = 10 * maxrep + c - '0';
584
		maxrep = 10 * maxrep + c - '0';
Lines 502-512 Link Here
502
	  if (!(syntax_bits & RE_NO_BK_BRACES))
588
	  if (!(syntax_bits & RE_NO_BK_BRACES))
503
	    {
589
	    {
504
	      if (c != '\\')
590
	      if (c != '\\')
505
		dfaerror("malformed repeat count");
591
		dfaerror(_("malformed repeat count"));
506
	      FETCH(c, "unfinished repeat count");
592
	      FETCH(c, _("unfinished repeat count"));
507
	    }
593
	    }
508
	  if (c != '}')
594
	  if (c != '}')
509
	    dfaerror("malformed repeat count");
595
	    dfaerror(_("malformed repeat count"));
510
	  laststart = 0;
596
	  laststart = 0;
511
	  return lasttok = REPMN;
597
	  return lasttok = REPMN;
512
598
Lines 556-566 Link Here
556
642
557
	case 'w':
643
	case 'w':
558
	case 'W':
644
	case 'W':
559
	  if (!backslash)
645
	  if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
560
	    goto normal_char;
646
	    goto normal_char;
561
	  zeroset(ccl);
647
	  zeroset(ccl);
562
	  for (c2 = 0; c2 < NOTCHAR; ++c2)
648
	  for (c2 = 0; c2 < NOTCHAR; ++c2)
563
	    if (ISALNUM(c2))
649
	    if (IS_WORD_CONSTITUENT(c2))
564
	      setbit(c2, ccl);
650
	      setbit(c2, ccl);
565
	  if (c == 'W')
651
	  if (c == 'W')
566
	    notset(ccl);
652
	    notset(ccl);
Lines 571-580 Link Here
571
	  if (backslash)
657
	  if (backslash)
572
	    goto normal_char;
658
	    goto normal_char;
573
	  zeroset(ccl);
659
	  zeroset(ccl);
574
	  FETCH(c, "Unbalanced [");
660
	  FETCH(c, _("Unbalanced ["));
575
	  if (c == '^')
661
	  if (c == '^')
576
	    {
662
	    {
577
	      FETCH(c, "Unbalanced [");
663
	      FETCH(c, _("Unbalanced ["));
578
	      invert = 1;
664
	      invert = 1;
579
	    }
665
	    }
580
	  else
666
	  else
Lines 591-610 Link Here
591
		for (c1 = 0; prednames[c1].name; ++c1)
677
		for (c1 = 0; prednames[c1].name; ++c1)
592
		  if (looking_at(prednames[c1].name))
678
		  if (looking_at(prednames[c1].name))
593
		    {
679
		    {
680
			int (*pred)() = prednames[c1].pred;
681
			if (case_fold
682
			    && (pred == is_upper || pred == is_lower))
683
				pred = is_alpha;
684
594
		      for (c2 = 0; c2 < NOTCHAR; ++c2)
685
		      for (c2 = 0; c2 < NOTCHAR; ++c2)
595
			if ((*prednames[c1].pred)(c2))
686
			if ((*pred)(c2))
596
			  setbit(c2, ccl);
687
			  setbit(c2, ccl);
597
		      lexptr += strlen(prednames[c1].name);
688
		      lexptr += strlen(prednames[c1].name);
598
		      lexleft -= strlen(prednames[c1].name);
689
		      lexleft -= strlen(prednames[c1].name);
599
		      FETCH(c1, "Unbalanced [");
690
		      FETCH(c1, _("Unbalanced ["));
600
		      goto skip;
691
		      goto skip;
601
		    }
692
		    }
602
	      if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
693
	      if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
603
		FETCH(c, "Unbalanced [");
694
		FETCH(c, _("Unbalanced ["));
604
	      FETCH(c1, "Unbalanced [");
695
	      FETCH(c1, _("Unbalanced ["));
605
	      if (c1 == '-')
696
	      if (c1 == '-')
606
		{
697
		{
607
		  FETCH(c2, "Unbalanced [");
698
		  FETCH(c2, _("Unbalanced ["));
608
		  if (c2 == ']')
699
		  if (c2 == ']')
609
		    {
700
		    {
610
		      /* In the case [x-], the - is an ordinary hyphen,
701
		      /* In the case [x-], the - is an ordinary hyphen,
Lines 617-660 Link Here
617
		    {
708
		    {
618
		      if (c2 == '\\'
709
		      if (c2 == '\\'
619
			  && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
710
			  && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
620
			FETCH(c2, "Unbalanced [");
711
			FETCH(c2, _("Unbalanced ["));
621
		      FETCH(c1, "Unbalanced [");
712
		      FETCH(c1, _("Unbalanced ["));
622
		    }
713
		    }
623
		}
714
		}
624
	      else
715
	      else
625
		c2 = c;
716
		c2 = c;
626
#ifdef __FreeBSD__
627
	      { token c3;
628
629
		if (collate_range_cmp(c, c2) > 0) {
630
		  FETCH(c2, "Invalid range");
631
		  goto skip;
632
		}
633
634
		for (c3 = 0; c3 < NOTCHAR; ++c3)
635
		  if (   collate_range_cmp(c, c3) <= 0
636
		      && collate_range_cmp(c3, c2) <= 0
637
		     ) {
638
		    setbit(c3, ccl);
639
		    if (case_fold)
640
		      if (ISUPPER(c3))
641
			setbit(tolower((unsigned char)c3), ccl);
642
		      else if (ISLOWER(c3))
643
			setbit(toupper((unsigned char)c3), ccl);
644
		  }
645
	      }
646
#else
647
	      while (c <= c2)
717
	      while (c <= c2)
648
		{
718
		{
649
		  setbit(c, ccl);
719
		  setbit(c, ccl);
650
		  if (case_fold)
720
		  if (case_fold)
651
		    if (ISUPPER(c))
721
		    if (ISUPPER(c))
652
		      setbit(tolower((unsigned char)c), ccl);
722
		      setbit(tolower(c), ccl);
653
		    else if (ISLOWER(c))
723
		    else if (ISLOWER(c))
654
		      setbit(toupper((unsigned char)c), ccl);
724
		      setbit(toupper(c), ccl);
655
		  ++c;
725
		  ++c;
656
		}
726
		}
657
#endif
658
	    skip:
727
	    skip:
659
	      ;
728
	      ;
660
	    }
729
	    }
Lines 675-684 Link Here
675
	    {
744
	    {
676
	      zeroset(ccl);
745
	      zeroset(ccl);
677
	      setbit(c, ccl);
746
	      setbit(c, ccl);
678
	      if (isupper((unsigned char)c))
747
	      if (isupper(c))
679
		setbit(tolower((unsigned char)c), ccl);
748
		setbit(tolower(c), ccl);
680
	      else
749
	      else
681
		setbit(toupper((unsigned char)c), ccl);
750
		setbit(toupper(c), ccl);
682
	      return lasttok = CSET + charclass_index(ccl);
751
	      return lasttok = CSET + charclass_index(ccl);
683
	    }
752
	    }
684
	  return c;
753
	  return c;
Lines 688-699 Link Here
688
  /* The above loop should consume at most a backslash
757
  /* The above loop should consume at most a backslash
689
     and some other character. */
758
     and some other character. */
690
  abort();
759
  abort();
760
  return END;	/* keeps pedantic compilers happy. */
691
}
761
}
692
762
693
/* Recursive descent parser for regular expressions. */
763
/* Recursive descent parser for regular expressions. */
694
764
695
static token tok;		/* Lookahead token. */
765
static token tok;		/* Lookahead token. */
696
static depth;			/* Current depth of a hypothetical stack
766
static int depth;		/* Current depth of a hypothetical stack
697
				   holding deferred productions.  This is
767
				   holding deferred productions.  This is
698
				   used to determine the depth that will be
768
				   used to determine the depth that will be
699
				   required of the real stack later on in
769
				   required of the real stack later on in
Lines 761-772 Link Here
761
831
762
   The parser builds a parse tree in postfix form in an array of tokens. */
832
   The parser builds a parse tree in postfix form in an array of tokens. */
763
833
764
#if __STDC__
765
static void regexp(int);
766
#else
767
static void regexp();
768
#endif
769
770
static void
834
static void
771
atom()
835
atom()
772
{
836
{
Lines 782-788 Link Here
782
      tok = lex();
846
      tok = lex();
783
      regexp(0);
847
      regexp(0);
784
      if (tok != RPAREN)
848
      if (tok != RPAREN)
785
	dfaerror("Unbalanced (");
849
	dfaerror(_("Unbalanced ("));
786
      tok = lex();
850
      tok = lex();
787
    }
851
    }
788
  else
852
  else
Lines 792-797 Link Here
792
/* Return the number of tokens in the given subexpression. */
856
/* Return the number of tokens in the given subexpression. */
793
static int
857
static int
794
nsubtoks(tindex)
858
nsubtoks(tindex)
859
int tindex;
795
{
860
{
796
  int ntoks1;
861
  int ntoks1;
797
862
Lines 902-908 Link Here
902
  parens = 0;
967
  parens = 0;
903
968
904
  if (! syntax_bits_set)
969
  if (! syntax_bits_set)
905
    dfaerror("No syntax specified");
970
    dfaerror(_("No syntax specified"));
906
971
907
  tok = lex();
972
  tok = lex();
908
  depth = d->depth;
973
  depth = d->depth;
Lines 910-916 Link Here
910
  regexp(1);
975
  regexp(1);
911
976
912
  if (tok != END)
977
  if (tok != END)
913
    dfaerror("Unbalanced )");
978
    dfaerror(_("Unbalanced )"));
914
979
915
  addtok(END - d->nregexps);
980
  addtok(END - d->nregexps);
916
  addtok(CAT);
981
  addtok(CAT);
Lines 949-955 Link Here
949
  position t1, t2;
1014
  position t1, t2;
950
1015
951
  for (i = 0; i < s->nelem && p.index < s->elems[i].index; ++i)
1016
  for (i = 0; i < s->nelem && p.index < s->elems[i].index; ++i)
952
    ;
1017
    continue;
953
  if (i < s->nelem && p.index == s->elems[i].index)
1018
  if (i < s->nelem && p.index == s->elems[i].index)
954
    s->elems[i].constraint |= p.constraint;
1019
    s->elems[i].constraint |= p.constraint;
955
  else
1020
  else
Lines 1082-1088 Link Here
1082
   that position with the elements of its follow labeled with an appropriate
1147
   that position with the elements of its follow labeled with an appropriate
1083
   constraint.  Repeat exhaustively until no funny positions are left.
1148
   constraint.  Repeat exhaustively until no funny positions are left.
1084
   S->elems must be large enough to hold the result. */
1149
   S->elems must be large enough to hold the result. */
1085
void
1150
static void epsclosure PARAMS ((position_set *s, struct dfa *d));
1151
1152
static void
1086
epsclosure(s, d)
1153
epsclosure(s, d)
1087
     position_set *s;
1154
     position_set *s;
1088
     struct dfa *d;
1155
     struct dfa *d;
Lines 1484-1490 Link Here
1484
  int state_newline;		/* New state on a newline transition. */
1551
  int state_newline;		/* New state on a newline transition. */
1485
  int wants_letter;		/* New state wants to know letter context. */
1552
  int wants_letter;		/* New state wants to know letter context. */
1486
  int state_letter;		/* New state on a letter transition. */
1553
  int state_letter;		/* New state on a letter transition. */
1487
  static initialized;		/* Flag for static initialization. */
1554
  static int initialized;	/* Flag for static initialization. */
1488
  int i, j, k;
1555
  int i, j, k;
1489
1556
1490
  /* Initialize the set of letters, if necessary. */
1557
  /* Initialize the set of letters, if necessary. */
Lines 1492-1498 Link Here
1492
    {
1559
    {
1493
      initialized = 1;
1560
      initialized = 1;
1494
      for (i = 0; i < NOTCHAR; ++i)
1561
      for (i = 0; i < NOTCHAR; ++i)
1495
	if (ISALNUM(i))
1562
	if (IS_WORD_CONSTITUENT(i))
1496
	  setbit(i, letters);
1563
	  setbit(i, letters);
1497
      setbit('\n', newline);
1564
      setbit('\n', newline);
1498
    }
1565
    }
Lines 1531-1537 Link Here
1531
1598
1532
	  /* If there are no characters left, there's no point in going on. */
1599
	  /* If there are no characters left, there's no point in going on. */
1533
	  for (j = 0; j < CHARCLASS_INTS && !matches[j]; ++j)
1600
	  for (j = 0; j < CHARCLASS_INTS && !matches[j]; ++j)
1534
	    ;
1601
	    continue;
1535
	  if (j == CHARCLASS_INTS)
1602
	  if (j == CHARCLASS_INTS)
1536
	    continue;
1603
	    continue;
1537
	}
1604
	}
Lines 1549-1555 Link Here
1549
	     matches. */
1616
	     matches. */
1550
	  intersectf = 0;
1617
	  intersectf = 0;
1551
	  for (k = 0; k < CHARCLASS_INTS; ++k)
1618
	  for (k = 0; k < CHARCLASS_INTS; ++k)
1552
	    (intersect[k] = matches[k] & labels[j][k]) ? intersectf = 1 : 0;
1619
	    (intersect[k] = matches[k] & labels[j][k]) ? (intersectf = 1) : 0;
1553
	  if (! intersectf)
1620
	  if (! intersectf)
1554
	    continue;
1621
	    continue;
1555
1622
Lines 1560-1567 Link Here
1560
	      /* Even an optimizing compiler can't know this for sure. */
1627
	      /* Even an optimizing compiler can't know this for sure. */
1561
	      int match = matches[k], label = labels[j][k];
1628
	      int match = matches[k], label = labels[j][k];
1562
1629
1563
	      (leftovers[k] = ~match & label) ? leftoversf = 1 : 0;
1630
	      (leftovers[k] = ~match & label) ? (leftoversf = 1) : 0;
1564
	      (matches[k] = match & ~label) ? matchesf = 1 : 0;
1631
	      (matches[k] = match & ~label) ? (matchesf = 1) : 0;
1565
	    }
1632
	    }
1566
1633
1567
	  /* If there were leftovers, create a new group labeled with them. */
1634
	  /* If there were leftovers, create a new group labeled with them. */
Lines 1625-1636 Link Here
1625
      else
1692
      else
1626
	state_letter = state;
1693
	state_letter = state;
1627
      for (i = 0; i < NOTCHAR; ++i)
1694
      for (i = 0; i < NOTCHAR; ++i)
1628
	if (i == '\n')
1695
	trans[i] = (IS_WORD_CONSTITUENT(i)) ? state_letter : state;
1629
	  trans[i] = state_newline;
1696
      trans['\n'] = state_newline;
1630
	else if (ISALNUM(i))
1631
	  trans[i] = state_letter;
1632
	else
1633
	  trans[i] = state;
1634
    }
1697
    }
1635
  else
1698
  else
1636
    for (i = 0; i < NOTCHAR; ++i)
1699
    for (i = 0; i < NOTCHAR; ++i)
Lines 1688-1694 Link Here
1688
1751
1689
	      if (c == '\n')
1752
	      if (c == '\n')
1690
		trans[c] = state_newline;
1753
		trans[c] = state_newline;
1691
	      else if (ISALNUM(c))
1754
	      else if (IS_WORD_CONSTITUENT(c))
1692
		trans[c] = state_letter;
1755
		trans[c] = state_letter;
1693
	      else if (c < NOTCHAR)
1756
	      else if (c < NOTCHAR)
1694
		trans[c] = state;
1757
		trans[c] = state;
Lines 1822-1833 Link Here
1822
     int *count;
1885
     int *count;
1823
     int *backref;
1886
     int *backref;
1824
{
1887
{
1825
  register s, s1, tmp;		/* Current state. */
1888
  register int s, s1, tmp;	/* Current state. */
1826
  register unsigned char *p;	/* Current input character. */
1889
  register unsigned char *p;	/* Current input character. */
1827
  register **trans, *t;		/* Copy of d->trans so it can be optimized
1890
  register int **trans, *t;	/* Copy of d->trans so it can be optimized
1828
				   into a register. */
1891
				   into a register. */
1829
  static sbit[NOTCHAR];	/* Table for anding with d->success. */
1892
  static int sbit[NOTCHAR];	/* Table for anding with d->success. */
1830
  static sbit_init;
1893
  static int sbit_init;
1831
1894
1832
  if (! sbit_init)
1895
  if (! sbit_init)
1833
    {
1896
    {
Lines 1835-1846 Link Here
1835
1898
1836
      sbit_init = 1;
1899
      sbit_init = 1;
1837
      for (i = 0; i < NOTCHAR; ++i)
1900
      for (i = 0; i < NOTCHAR; ++i)
1838
	if (i == '\n')
1901
	sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1;
1839
	  sbit[i] = 4;
1902
      sbit['\n'] = 4;
1840
	else if (ISALNUM(i))
1841
	  sbit[i] = 2;
1842
	else
1843
	  sbit[i] = 1;
1844
    }
1903
    }
1845
1904
1846
  if (! d->tralloc)
1905
  if (! d->tralloc)
Lines 1853-1882 Link Here
1853
1912
1854
  for (;;)
1913
  for (;;)
1855
    {
1914
    {
1856
      /* The dreaded inner loop. */
1915
      while ((t = trans[s]) != 0) { /* hand-optimized loop */
1857
      if ((t = trans[s]) != 0)
1916
	s1 = t[*p++];
1858
	do
1917
        if ((t = trans[s1]) == 0) {
1859
	  {
1918
           tmp = s ; s = s1 ; s1 = tmp ; /* swap */
1860
	    s1 = t[*p++];
1919
           break;
1861
	    if (! (t = trans[s1]))
1920
        }
1862
	      goto last_was_s;
1921
	s = t[*p++];
1863
	    s = t[*p++];
1922
      }
1864
	  }
1865
        while ((t = trans[s]) != 0);
1866
      goto last_was_s1;
1867
    last_was_s:
1868
      tmp = s, s = s1, s1 = tmp;
1869
    last_was_s1:
1870
1923
1871
      if (s >= 0 && p <= (unsigned char *) end && d->fails[s])
1924
      if (s >= 0 && p <= (unsigned char *) end && d->fails[s])
1872
	{
1925
	{
1873
	  if (d->success[s] & sbit[*p])
1926
	  if (d->success[s] & sbit[*p])
1874
	    {
1927
	    {
1875
	      if (backref)
1928
	      if (backref)
1876
		if (d->states[s].backref)
1929
		*backref = (d->states[s].backref != 0);
1877
		  *backref = 1;
1878
		else
1879
		  *backref = 0;
1880
	      return (char *) p;
1930
	      return (char *) p;
1881
	    }
1931
	    }
1882
1932
Lines 1940-1963 Link Here
1940
{
1990
{
1941
  if (case_fold)	/* dummy folding in service of dfamust() */
1991
  if (case_fold)	/* dummy folding in service of dfamust() */
1942
    {
1992
    {
1943
      char *copy;
1993
      char *lcopy;
1944
      int i;
1994
      int i;
1945
1995
1946
      copy = malloc(len);
1996
      lcopy = malloc(len);
1947
      if (!copy)
1997
      if (!lcopy)
1948
	dfaerror("out of memory");
1998
	dfaerror(_("out of memory"));
1949
1999
1950
      /* This is a kludge. */
2000
      /* This is a kludge. */
1951
      case_fold = 0;
2001
      case_fold = 0;
1952
      for (i = 0; i < len; ++i)
2002
      for (i = 0; i < len; ++i)
1953
	if (ISUPPER(s[i]))
2003
	if (ISUPPER ((unsigned char) s[i]))
1954
	  copy[i] = tolower((unsigned char)s[i]);
2004
	  lcopy[i] = tolower ((unsigned char) s[i]);
1955
	else
2005
	else
1956
	  copy[i] = s[i];
2006
	  lcopy[i] = s[i];
1957
2007
1958
      dfainit(d);
2008
      dfainit(d);
1959
      dfaparse(copy, len, d);
2009
      dfaparse(lcopy, len, d);
1960
      free(copy);
2010
      free(lcopy);
1961
      dfamust(d);
2011
      dfamust(d);
1962
      d->cindex = d->tindex = d->depth = d->nleaves = d->nregexps = 0;
2012
      d->cindex = d->tindex = d->depth = d->nleaves = d->nregexps = 0;
1963
      case_fold = 1;
2013
      case_fold = 1;
Lines 1995-2003 Link Here
1995
      free((ptr_t) d->trans[i]);
2045
      free((ptr_t) d->trans[i]);
1996
    else if (d->fails[i])
2046
    else if (d->fails[i])
1997
      free((ptr_t) d->fails[i]);
2047
      free((ptr_t) d->fails[i]);
1998
  free((ptr_t) d->realtrans);
2048
  if (d->realtrans) free((ptr_t) d->realtrans);
1999
  free((ptr_t) d->fails);
2049
  if (d->fails) free((ptr_t) d->fails);
2000
  free((ptr_t) d->newlines);
2050
  if (d->newlines) free((ptr_t) d->newlines);
2051
  if (d->success) free((ptr_t) d->success);
2001
  for (dm = d->musts; dm; dm = ndm)
2052
  for (dm = d->musts; dm; dm = ndm)
2002
    {
2053
    {
2003
      ndm = dm->next;
2054
      ndm = dm->next;
Lines 2092-2098 Link Here
2092
     char *new;
2143
     char *new;
2093
{
2144
{
2094
  char *result;
2145
  char *result;
2095
  int oldsize, newsize;
2146
  size_t oldsize, newsize;
2096
2147
2097
  newsize = (new == NULL) ? 0 : strlen(new);
2148
  newsize = (new == NULL) ? 0 : strlen(new);
2098
  if (old == NULL)
2149
  if (old == NULL)
Lines 2122-2128 Link Here
2122
     char *lookfor;
2173
     char *lookfor;
2123
{
2174
{
2124
  char *cp;
2175
  char *cp;
2125
  int len;
2176
  size_t len;
2126
2177
2127
  len = strlen(lookfor);
2178
  len = strlen(lookfor);
2128
  for (cp = lookin; *cp != '\0'; ++cp)
2179
  for (cp = lookin; *cp != '\0'; ++cp)
Lines 2158-2164 Link Here
2158
enlist(cpp, new, len)
2209
enlist(cpp, new, len)
2159
     char **cpp;
2210
     char **cpp;
2160
     char *new;
2211
     char *new;
2161
     int len;
2212
     size_t len;
2162
{
2213
{
2163
  int i, j;
2214
  int i, j;
2164
2215
Lines 2210-2216 Link Here
2210
  char **cpp;
2261
  char **cpp;
2211
  char *lcp;
2262
  char *lcp;
2212
  char *rcp;
2263
  char *rcp;
2213
  int i, len;
2264
  size_t i, len;
2214
2265
2215
  if (left == NULL || right == NULL)
2266
  if (left == NULL || right == NULL)
2216
    return NULL;
2267
    return NULL;
Lines 2225-2231 Link Here
2225
      while (rcp != NULL)
2276
      while (rcp != NULL)
2226
	{
2277
	{
2227
	  for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
2278
	  for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
2228
	    ;
2279
	    continue;
2229
	  if (i > len)
2280
	  if (i > len)
2230
	    len = i;
2281
	    len = i;
2231
	  rcp = index(rcp + 1, *lcp);
2282
	  rcp = index(rcp + 1, *lcp);
Lines 2285-2290 Link Here
2285
	    }
2336
	    }
2286
	  both = addlists(both, temp);
2337
	  both = addlists(both, temp);
2287
	  freelist(temp);
2338
	  freelist(temp);
2339
	  free(temp);
2288
	  if (both == NULL)
2340
	  if (both == NULL)
2289
	    return NULL;
2341
	    return NULL;
2290
	}
2342
	}
Lines 2321-2328 Link Here
2321
  token t;
2373
  token t;
2322
  static must must0;
2374
  static must must0;
2323
  struct dfamust *dm;
2375
  struct dfamust *dm;
2376
  static char empty_string[] = "";
2324
2377
2325
  result = "";
2378
  result = empty_string;
2326
  exact = 0;
2379
  exact = 0;
2327
  musts = (must *) malloc((dfa->tindex + 1) * sizeof *musts);
2380
  musts = (must *) malloc((dfa->tindex + 1) * sizeof *musts);
2328
  if (musts == NULL)
2381
  if (musts == NULL)
Lines 2509-2515 Link Here
2509
	      resetmust(mp);
2562
	      resetmust(mp);
2510
	      mp->is[0] = mp->left[0] = mp->right[0] = t;
2563
	      mp->is[0] = mp->left[0] = mp->right[0] = t;
2511
	      mp->is[1] = mp->left[1] = mp->right[1] = '\0';
2564
	      mp->is[1] = mp->left[1] = mp->right[1] = '\0';
2512
	      mp->in = enlist(mp->in, mp->is, 1);
2565
	      mp->in = enlist(mp->in, mp->is, (size_t)1);
2513
	      if (mp->in == NULL)
2566
	      if (mp->in == NULL)
2514
		goto done;
2567
		goto done;
2515
	    }
2568
	    }
(-)grep/dfa.h (-19 / +30 lines)
Lines 1-5 Link Here
1
/* dfa.h - declarations for GNU deterministic regexp compiler
1
/* dfa.h - declarations for GNU deterministic regexp compiler
2
   Copyright (C) 1988 Free Software Foundation, Inc.
2
   Copyright (C) 1988, 1998 Free Software Foundation, Inc.
3
3
4
   This program is free software; you can redistribute it and/or modify
4
   This program is free software; you can redistribute it and/or modify
5
   it under the terms of the GNU General Public License as published by
5
   it under the terms of the GNU General Public License as published by
Lines 13-19 Link Here
13
13
14
   You should have received a copy of the GNU General Public License
14
   You should have received a copy of the GNU General Public License
15
   along with this program; if not, write to the Free Software
15
   along with this program; if not, write to the Free Software
16
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
16
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA */
17
17
18
/* Written June, 1988 by Mike Haertel */
18
/* Written June, 1988 by Mike Haertel */
19
19
Lines 22-35 Link Here
22
   In addition to clobbering modularity, we eat up valuable
22
   In addition to clobbering modularity, we eat up valuable
23
   name space. */
23
   name space. */
24
24
25
# undef PARAMS
26
#if __STDC__
27
# ifndef _PTR_T
28
# define _PTR_T
29
  typedef void * ptr_t;
30
# endif
31
# define PARAMS(x) x
32
#else
33
# ifndef _PTR_T
34
# define _PTR_T
35
  typedef char * ptr_t;
36
# endif
37
# define PARAMS(x) ()
38
#endif
39
25
/* Number of bits in an unsigned char. */
40
/* Number of bits in an unsigned char. */
41
#ifndef CHARBITS
26
#define CHARBITS 8
42
#define CHARBITS 8
43
#endif
27
44
28
/* First integer value that is greater than any character code. */
45
/* First integer value that is greater than any character code. */
29
#define NOTCHAR (1 << CHARBITS)
46
#define NOTCHAR (1 << CHARBITS)
30
47
31
/* INTBITS need not be exact, just a lower bound. */
48
/* INTBITS need not be exact, just a lower bound. */
49
#ifndef INTBITS
32
#define INTBITS (CHARBITS * sizeof (int))
50
#define INTBITS (CHARBITS * sizeof (int))
51
#endif
33
52
34
/* Number of ints required to hold a bit for every character. */
53
/* Number of ints required to hold a bit for every character. */
35
#define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS)
54
#define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS)
Lines 301-316 Link Here
301
320
302
/* Entry points. */
321
/* Entry points. */
303
322
304
#if __STDC__
305
306
/* dfasyntax() takes two arguments; the first sets the syntax bits described
323
/* dfasyntax() takes two arguments; the first sets the syntax bits described
307
   earlier in this file, and the second sets the case-folding flag. */
324
   earlier in this file, and the second sets the case-folding flag. */
308
extern void dfasyntax(int, int);
325
extern void dfasyntax PARAMS ((reg_syntax_t, int));
309
326
310
/* Compile the given string of the given length into the given struct dfa.
327
/* Compile the given string of the given length into the given struct dfa.
311
   Final argument is a flag specifying whether to build a searching or an
328
   Final argument is a flag specifying whether to build a searching or an
312
   exact matcher. */
329
   exact matcher. */
313
extern void dfacomp(char *, size_t, struct dfa *, int);
330
extern void dfacomp PARAMS ((char *, size_t, struct dfa *, int));
314
331
315
/* Execute the given struct dfa on the buffer of characters.  The
332
/* Execute the given struct dfa on the buffer of characters.  The
316
   first char * points to the beginning, and the second points to the
333
   first char * points to the beginning, and the second points to the
Lines 324-349 Link Here
324
   order to verify backreferencing; otherwise the flag will be cleared.
341
   order to verify backreferencing; otherwise the flag will be cleared.
325
   Returns NULL if no match is found, or a pointer to the first
342
   Returns NULL if no match is found, or a pointer to the first
326
   character after the first & shortest matching string in the buffer. */
343
   character after the first & shortest matching string in the buffer. */
327
extern char *dfaexec(struct dfa *, char *, char *, int, int *, int *);
344
extern char *dfaexec PARAMS ((struct dfa *, char *, char *, int, int *, int *));
328
345
329
/* Free the storage held by the components of a struct dfa. */
346
/* Free the storage held by the components of a struct dfa. */
330
extern void dfafree(struct dfa *);
347
extern void dfafree PARAMS ((struct dfa *));
331
348
332
/* Entry points for people who know what they're doing. */
349
/* Entry points for people who know what they're doing. */
333
350
334
/* Initialize the components of a struct dfa. */
351
/* Initialize the components of a struct dfa. */
335
extern void dfainit(struct dfa *);
352
extern void dfainit PARAMS ((struct dfa *));
336
353
337
/* Incrementally parse a string of given length into a struct dfa. */
354
/* Incrementally parse a string of given length into a struct dfa. */
338
extern void dfaparse(char *, size_t, struct dfa *);
355
extern void dfaparse PARAMS ((char *, size_t, struct dfa *));
339
356
340
/* Analyze a parsed regexp; second argument tells whether to build a searching
357
/* Analyze a parsed regexp; second argument tells whether to build a searching
341
   or an exact matcher. */
358
   or an exact matcher. */
342
extern void dfaanalyze(struct dfa *, int);
359
extern void dfaanalyze PARAMS ((struct dfa *, int));
343
360
344
/* Compute, for each possible character, the transitions out of a given
361
/* Compute, for each possible character, the transitions out of a given
345
   state, storing them in an array of integers. */
362
   state, storing them in an array of integers. */
346
extern void dfastate(int, struct dfa *, int []);
363
extern void dfastate PARAMS ((int, struct dfa *, int []));
347
364
348
/* Error handling. */
365
/* Error handling. */
349
366
Lines 351-360 Link Here
351
   takes a single argument, a NUL-terminated string describing the error.
368
   takes a single argument, a NUL-terminated string describing the error.
352
   The default dfaerror() prints the error message to stderr and exits.
369
   The default dfaerror() prints the error message to stderr and exits.
353
   The user can provide a different dfafree() if so desired. */
370
   The user can provide a different dfafree() if so desired. */
354
extern void dfaerror(char *);
371
extern void dfaerror PARAMS ((const char *));
355
356
#else /* ! __STDC__ */
357
extern void dfasyntax(), dfacomp(), dfafree(), dfainit(), dfaparse();
358
extern void dfaanalyze(), dfastate(), dfaerror();
359
extern char *dfaexec();
360
#endif /* ! __STDC__ */
(-)grep/dosbuf.c (+186 lines)
Line 0 Link Here
1
/* Messy DOS-specific code for correctly treating binary, Unix text
2
   and DOS text files.
3
4
   This has several aspects:
5
6
     * Guessing the file type (unless the user tells us);
7
     * Stripping CR characters from DOS text files (otherwise regex
8
       functions won't work correctly);
9
     * Reporting correct byte count with -b for any kind of file.
10
11
*/
12
13
typedef enum {
14
  UNKNOWN, DOS_BINARY, DOS_TEXT, UNIX_TEXT
15
} File_type;
16
17
struct dos_map {
18
  off_t pos;	/* position in buffer passed to matcher */
19
  off_t add;	/* how much to add when reporting char position */
20
};
21
22
static int       dos_report_unix_offset = 0;
23
24
static File_type dos_file_type     = UNKNOWN;
25
static File_type dos_use_file_type = UNKNOWN;
26
static off_t     dos_stripped_crs  = 0;
27
static struct dos_map *dos_pos_map;
28
static int       dos_pos_map_size  = 0;
29
static int       dos_pos_map_used  = 0;
30
static int       inp_map_idx = 0, out_map_idx = 1;
31
32
/* Guess DOS file type by looking at its contents.  */
33
static inline File_type
34
guess_type(buf, buflen)
35
	char *buf;
36
	register size_t buflen;
37
{
38
  int crlf_seen = 0;
39
  register char *bp = buf;
40
41
  while (buflen--)
42
    {
43
      /* Treat a file as binary if it has a NUL character.  */
44
      if (!*bp)
45
        return DOS_BINARY;
46
47
      /* CR before LF means DOS text file (unless we later see
48
         binary characters).  */
49
      else if (*bp == '\r' && buflen && bp[1] == '\n')
50
        crlf_seen = 1;
51
52
      bp++;
53
    }
54
55
  return crlf_seen ? DOS_TEXT : UNIX_TEXT;
56
}
57
58
/* Convert external DOS file representation to internal.
59
   Return the count of characters left in the buffer.
60
   Build table to map character positions when reporting byte counts.  */
61
static inline int
62
undossify_input(buf, buflen)
63
	register char *buf;
64
	size_t buflen;
65
{
66
  int chars_left = 0;
67
68
  if (totalcc == 0)
69
    {
70
      /* New file: forget everything we knew about character
71
         position mapping table and file type.  */
72
      inp_map_idx = 0;
73
      out_map_idx = 1;
74
      dos_pos_map_used = 0;
75
      dos_stripped_crs = 0;
76
      dos_file_type = dos_use_file_type;
77
    }
78
79
  /* Guess if this file is binary, unless we already know that.  */
80
  if (dos_file_type == UNKNOWN)
81
    dos_file_type = guess_type(buf, buflen);
82
83
  /* If this file is to be treated as DOS Text, strip the CR characters
84
     and maybe build the table for character position mapping on output.  */
85
  if (dos_file_type == DOS_TEXT)
86
    {
87
      char   *destp   = buf;
88
89
      while (buflen--)
90
        {
91
          if (*buf != '\r')
92
            {
93
              *destp++ = *buf++;
94
              chars_left++;
95
            }
96
          else
97
            {
98
              buf++;
99
              if (out_byte && !dos_report_unix_offset)
100
                {
101
                  dos_stripped_crs++;
102
                  while (buflen && *buf == '\r')
103
                    {
104
                      dos_stripped_crs++;
105
                      buflen--;
106
                      buf++;
107
                    }
108
                  if (inp_map_idx >= dos_pos_map_size - 1)
109
                    {
110
                      dos_pos_map_size = inp_map_idx ? inp_map_idx * 2 : 1000;
111
                      dos_pos_map =
112
                        (struct dos_map *)xrealloc((char *)dos_pos_map,
113
						   dos_pos_map_size *
114
						   sizeof(struct dos_map));
115
                    }
116
117
                  if (!inp_map_idx)
118
                    {
119
                      /* Add sentinel entry.  */
120
                      dos_pos_map[inp_map_idx].pos = 0;
121
                      dos_pos_map[inp_map_idx++].add = 0;
122
123
                      /* Initialize first real entry.  */
124
                      dos_pos_map[inp_map_idx].add = 0;
125
                    }
126
127
                  /* Put the new entry.  If the stripped CR characters
128
                     precede a Newline (the usual case), pretend that
129
                     they were found *after* the Newline.  This makes
130
                     displayed byte offsets more reasonable in some
131
                     cases, and fits better the intuitive notion that
132
                     the line ends *before* the CR, not *after* it.  */
133
                  inp_map_idx++;
134
                  dos_pos_map[inp_map_idx-1].pos =
135
                    (*buf == '\n' ? destp + 1 : destp ) - bufbeg + totalcc;
136
                  dos_pos_map[inp_map_idx].add = dos_stripped_crs;
137
                  dos_pos_map_used = inp_map_idx;
138
139
                  /* The following will be updated on the next pass.  */
140
                  dos_pos_map[inp_map_idx].pos = destp - bufbeg + totalcc + 1;
141
                }
142
            }
143
        }
144
145
      return chars_left;
146
    }
147
148
  return buflen;
149
}
150
151
/* Convert internal byte count into external.  */
152
static inline off_t
153
dossified_pos (byteno)
154
	off_t byteno;
155
{
156
  off_t pos_lo;
157
  off_t pos_hi;
158
159
  if (dos_file_type != DOS_TEXT || dos_report_unix_offset)
160
    return byteno;
161
162
  /* Optimization: usually the file will be scanned sequentially.
163
     So in most cases, this byte position will be found in the
164
     table near the previous one, as recorded in `out_map_idx'.  */
165
  pos_lo = dos_pos_map[out_map_idx-1].pos;
166
  pos_hi = dos_pos_map[out_map_idx].pos;
167
168
  /* If the initial guess failed, search up or down, as
169
     appropriate, beginning with the previous place.  */
170
  if (byteno >= pos_hi)
171
    {
172
      out_map_idx++;
173
      while (out_map_idx < dos_pos_map_used &&
174
             byteno >= dos_pos_map[out_map_idx].pos)
175
        out_map_idx++;
176
    }
177
178
  else if (byteno < pos_lo)
179
    {
180
      out_map_idx--;
181
      while (out_map_idx > 1 && byteno < dos_pos_map[out_map_idx-1].pos)
182
        out_map_idx--;
183
    }
184
185
  return byteno + dos_pos_map[out_map_idx].add;
186
}
(-)grep/egrepmat.c (+6 lines)
Line 0 Link Here
1
#ifdef HAVE_CONFIG_H
2
# include <config.h>
3
#endif
4
#include "system.h"
5
#include "grep.h"
6
char const default_matcher[] = "egrep";
(-)grep/fgrepmat.c (+6 lines)
Line 0 Link Here
1
#ifdef HAVE_CONFIG_H
2
# include <config.h>
3
#endif
4
#include "system.h"
5
#include "grep.h"
6
char const default_matcher[] = "fgrep";
(-)grep/getopt.c (-173 / +442 lines)
Lines 3-54 Link Here
3
   "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu
3
   "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu
4
   before changing it!
4
   before changing it!
5
5
6
   Copyright (C) 1987, 88, 89, 90, 91, 92, 1993
6
   Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97
7
   	Free Software Foundation, Inc.
7
   	Free Software Foundation, Inc.
8
8
9
   This program is free software; you can redistribute it and/or modify it
9
   This file is part of the GNU C Library.  Its master source is NOT part of
10
   under the terms of the GNU General Public License as published by the
10
   the C library, however.  The master source lives in /gd/gnu/lib.
11
   Free Software Foundation; either version 2, or (at your option) any
12
   later version.
13
11
14
   This program is distributed in the hope that it will be useful,
12
   The GNU C Library is free software; you can redistribute it and/or
13
   modify it under the terms of the GNU Library General Public License as
14
   published by the Free Software Foundation; either version 2 of the
15
   License, or (at your option) any later version.
16
17
   The GNU C Library is distributed in the hope that it will be useful,
15
   but WITHOUT ANY WARRANTY; without even the implied warranty of
18
   but WITHOUT ANY WARRANTY; without even the implied warranty of
16
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
   GNU General Public License for more details.
20
   Library General Public License for more details.
18
21
19
   You should have received a copy of the GNU General Public License
22
   You should have received a copy of the GNU Library General Public
20
   along with this program; if not, write to the Free Software
23
   License along with the GNU C Library; see the file COPYING.LIB.  If not,
21
   Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
24
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25
   Boston, MA 02111-1307, USA.  */
22
26
23
/* NOTE!!!  AIX requires this to be the first thing in the file.
27
/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
24
   Do not put ANYTHING before it!  */
28
   Ditto for AIX 3.2 and <stdlib.h>.  */
25
#if !defined (__GNUC__) && defined (_AIX)
29
#ifndef _NO_PROTO
26
 #pragma alloca
30
#define _NO_PROTO
27
#endif
31
#endif
28
32
29
#ifdef HAVE_CONFIG_H
33
#ifdef HAVE_CONFIG_H
30
#include "config.h"
34
#include <config.h>
31
#endif
32
33
#ifdef __GNUC__
34
#define alloca __builtin_alloca
35
#else /* not __GNUC__ */
36
#if defined (HAVE_ALLOCA_H) || (defined(sparc) && (defined(sun) || (!defined(USG) && !defined(SVR4) && !defined(__svr4__))))
37
#include <alloca.h>
38
#else
39
#ifndef _AIX
40
char *alloca ();
41
#endif
35
#endif
42
#endif /* alloca.h */
43
#endif /* not __GNUC__ */
44
36
45
#if !__STDC__ && !defined(const) && IN_GCC
37
#if !defined (__STDC__) || !__STDC__
38
/* This is a separate conditional since some stdc systems
39
   reject `defined (const)'.  */
40
#ifndef const
46
#define const
41
#define const
47
#endif
42
#endif
48
49
/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.  */
50
#ifndef _NO_PROTO
51
#define _NO_PROTO
52
#endif
43
#endif
53
44
54
#include <stdio.h>
45
#include <stdio.h>
Lines 61-84 Link Here
61
   program understand `configure --with-gnu-libc' and omit the object files,
52
   program understand `configure --with-gnu-libc' and omit the object files,
62
   it is simpler to just do this in the source for each such file.  */
53
   it is simpler to just do this in the source for each such file.  */
63
54
64
#if defined (_LIBC) || !defined (__GNU_LIBRARY__)
55
#define GETOPT_INTERFACE_VERSION 2
56
#if !defined (_LIBC) && defined (__GLIBC__) && __GLIBC__ >= 2
57
#include <gnu-versions.h>
58
#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
59
#define ELIDE_CODE
60
#endif
61
#endif
62
63
#ifndef ELIDE_CODE
65
64
66
65
67
/* This needs to come after some library #include
66
/* This needs to come after some library #include
68
   to get __GNU_LIBRARY__ defined.  */
67
   to get __GNU_LIBRARY__ defined.  */
69
#ifdef	__GNU_LIBRARY__
68
#ifdef	__GNU_LIBRARY__
70
#undef	alloca
71
/* Don't include stdlib.h for non-GNU C libraries because some of them
69
/* Don't include stdlib.h for non-GNU C libraries because some of them
72
   contain conflicting prototypes for getopt.  */
70
   contain conflicting prototypes for getopt.  */
73
#include <stdlib.h>
71
#include <stdlib.h>
74
#else	/* Not GNU C library.  */
72
#include <unistd.h>
75
#define	__alloca	alloca
76
#endif	/* GNU C library.  */
73
#endif	/* GNU C library.  */
77
74
78
/* If GETOPT_COMPAT is defined, `+' as well as `--' can introduce a
75
#ifdef VMS
79
   long-named option.  Because this is not POSIX.2 compliant, it is
76
#include <unixlib.h>
80
   being phased out.  */
77
#if HAVE_STRING_H - 0
81
/* #define GETOPT_COMPAT */
78
#include <string.h>
79
#endif
80
#endif
81
82
#if defined (WIN32) && !defined (__CYGWIN32__)
83
/* It's not Unix, really.  See?  Capital letters.  */
84
#include <windows.h>
85
#define getpid() GetCurrentProcessId()
86
#endif
87
88
#ifndef _
89
/* This is for other GNU distributions with internationalized messages.
90
   When compiling libc, the _ macro is predefined.  */
91
#ifdef HAVE_LIBINTL_H
92
# include <libintl.h>
93
# define _(msgid)	gettext (msgid)
94
#else
95
# define _(msgid)	(msgid)
96
#endif
97
#endif
82
98
83
/* This version of `getopt' appears to the caller like standard Unix `getopt'
99
/* This version of `getopt' appears to the caller like standard Unix `getopt'
84
   but it behaves differently for the user, since it allows the user
100
   but it behaves differently for the user, since it allows the user
Lines 102-108 Link Here
102
   Also, when `ordering' is RETURN_IN_ORDER,
118
   Also, when `ordering' is RETURN_IN_ORDER,
103
   each non-option ARGV-element is returned here.  */
119
   each non-option ARGV-element is returned here.  */
104
120
105
char *optarg = 0;
121
char *optarg = NULL;
106
122
107
/* Index in ARGV of the next element to be scanned.
123
/* Index in ARGV of the next element to be scanned.
108
   This is used for communication to and from the caller
124
   This is used for communication to and from the caller
Lines 110-123 Link Here
110
126
111
   On entry to `getopt', zero means this is the first call; initialize.
127
   On entry to `getopt', zero means this is the first call; initialize.
112
128
113
   When `getopt' returns EOF, this is the index of the first of the
129
   When `getopt' returns -1, this is the index of the first of the
114
   non-option elements that the caller should itself scan.
130
   non-option elements that the caller should itself scan.
115
131
116
   Otherwise, `optind' communicates from one call to the next
132
   Otherwise, `optind' communicates from one call to the next
117
   how much of ARGV has been scanned so far.  */
133
   how much of ARGV has been scanned so far.  */
118
134
119
/* XXX 1003.2 says this must be 1 before any call.  */
135
/* 1003.2 says this must be 1 before any call.  */
120
int optind = 0;
136
int optind = 1;
137
138
/* Formerly, initialization of getopt depended on optind==0, which
139
   causes problems with re-calling getopt as programs generally don't
140
   know that. */
141
142
int __getopt_initialized = 0;
121
143
122
/* The next char to be scanned in the option-element
144
/* The next char to be scanned in the option-element
123
   in which the last option character we returned was found.
145
   in which the last option character we returned was found.
Lines 166-177 Link Here
166
188
167
   The special argument `--' forces an end of option-scanning regardless
189
   The special argument `--' forces an end of option-scanning regardless
168
   of the value of `ordering'.  In the case of RETURN_IN_ORDER, only
190
   of the value of `ordering'.  In the case of RETURN_IN_ORDER, only
169
   `--' can cause `getopt' to return EOF with `optind' != ARGC.  */
191
   `--' can cause `getopt' to return -1 with `optind' != ARGC.  */
170
192
171
static enum
193
static enum
172
{
194
{
173
  REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
195
  REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
174
} ordering;
196
} ordering;
197
198
/* Value of POSIXLY_CORRECT environment variable.  */
199
static char *posixly_correct;
175
200
176
#ifdef	__GNU_LIBRARY__
201
#ifdef	__GNU_LIBRARY__
177
/* We want to avoid inclusion of string.h with non-GNU libraries
202
/* We want to avoid inclusion of string.h with non-GNU libraries
Lines 180-186 Link Here
180
   in GCC.  */
205
   in GCC.  */
181
#include <string.h>
206
#include <string.h>
182
#define	my_index	strchr
207
#define	my_index	strchr
183
#define	my_bcopy(src, dst, n)	memcpy ((dst), (src), (n))
184
#else
208
#else
185
209
186
/* Avoid depending on library functions or files
210
/* Avoid depending on library functions or files
Lines 202-218 Link Here
202
  return 0;
226
  return 0;
203
}
227
}
204
228
205
static void
229
/* If using GCC, we can safely declare strlen this way.
206
my_bcopy (from, to, size)
230
   If not using GCC, it is ok not to declare it.  */
207
     const char *from;
231
#ifdef __GNUC__
208
     char *to;
232
/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
209
     int size;
233
   That was relevant to code that was here before.  */
210
{
234
#if !defined (__STDC__) || !__STDC__
211
  int i;
235
/* gcc with -traditional declares the built-in strlen to return int,
212
  for (i = 0; i < size; i++)
236
   and has done so at least since version 2.4.5. -- rms.  */
213
    to[i] = from[i];
237
extern int strlen (const char *);
214
}
238
#endif /* not __STDC__ */
215
#endif				/* GNU C library.  */
239
#endif /* __GNUC__ */
240
241
#endif /* not __GNU_LIBRARY__ */
216
242
217
/* Handle permutation of arguments.  */
243
/* Handle permutation of arguments.  */
218
244
Lines 223-228 Link Here
223
static int first_nonopt;
249
static int first_nonopt;
224
static int last_nonopt;
250
static int last_nonopt;
225
251
252
#ifdef _LIBC
253
/* Bash 2.0 gives us an environment variable containing flags
254
   indicating ARGV elements that should not be considered arguments.  */
255
256
static const char *nonoption_flags;
257
static int nonoption_flags_len;
258
259
static int original_argc;
260
static char *const *original_argv;
261
262
/* Make sure the environment variable bash 2.0 puts in the environment
263
   is valid for the getopt call we must make sure that the ARGV passed
264
   to getopt is that one passed to the process.  */
265
static void store_args (int argc, char *const *argv) __attribute__ ((unused));
266
static void
267
store_args (int argc, char *const *argv)
268
{
269
  /* XXX This is no good solution.  We should rather copy the args so
270
     that we can compare them later.  But we must not use malloc(3).  */
271
  original_argc = argc;
272
  original_argv = argv;
273
}
274
text_set_element (__libc_subinit, store_args);
275
#endif
276
226
/* Exchange two adjacent subsequences of ARGV.
277
/* Exchange two adjacent subsequences of ARGV.
227
   One subsequence is elements [first_nonopt,last_nonopt)
278
   One subsequence is elements [first_nonopt,last_nonopt)
228
   which contains all the non-options that have been skipped so far.
279
   which contains all the non-options that have been skipped so far.
Lines 232-258 Link Here
232
   `first_nonopt' and `last_nonopt' are relocated so that they describe
283
   `first_nonopt' and `last_nonopt' are relocated so that they describe
233
   the new indices of the non-options in ARGV after they are moved.  */
284
   the new indices of the non-options in ARGV after they are moved.  */
234
285
286
#if defined (__STDC__) && __STDC__
287
static void exchange (char **);
288
#endif
289
235
static void
290
static void
236
exchange (argv)
291
exchange (argv)
237
     char **argv;
292
     char **argv;
238
{
293
{
239
  int nonopts_size = (last_nonopt - first_nonopt) * sizeof (char *);
294
  int bottom = first_nonopt;
240
  char **temp = (char **) __alloca (nonopts_size);
295
  int middle = last_nonopt;
296
  int top = optind;
297
  char *tem;
298
299
  /* Exchange the shorter segment with the far end of the longer segment.
300
     That puts the shorter segment into the right place.
301
     It leaves the longer segment in the right place overall,
302
     but it consists of two parts that need to be swapped next.  */
241
303
242
  /* Interchange the two blocks of data in ARGV.  */
304
  while (top > middle && middle > bottom)
305
    {
306
      if (top - middle > middle - bottom)
307
	{
308
	  /* Bottom segment is the short one.  */
309
	  int len = middle - bottom;
310
	  register int i;
243
311
244
  my_bcopy ((char *) &argv[first_nonopt], (char *) temp, nonopts_size);
312
	  /* Swap it with the top part of the top segment.  */
245
  my_bcopy ((char *) &argv[last_nonopt], (char *) &argv[first_nonopt],
313
	  for (i = 0; i < len; i++)
246
	    (optind - last_nonopt) * sizeof (char *));
314
	    {
247
  my_bcopy ((char *) temp,
315
	      tem = argv[bottom + i];
248
	    (char *) &argv[first_nonopt + optind - last_nonopt],
316
	      argv[bottom + i] = argv[top - (middle - bottom) + i];
249
	    nonopts_size);
317
	      argv[top - (middle - bottom) + i] = tem;
318
	    }
319
	  /* Exclude the moved bottom segment from further swapping.  */
320
	  top -= len;
321
	}
322
      else
323
	{
324
	  /* Top segment is the short one.  */
325
	  int len = top - middle;
326
	  register int i;
327
328
	  /* Swap it with the bottom part of the bottom segment.  */
329
	  for (i = 0; i < len; i++)
330
	    {
331
	      tem = argv[bottom + i];
332
	      argv[bottom + i] = argv[middle + i];
333
	      argv[middle + i] = tem;
334
	    }
335
	  /* Exclude the moved top segment from further swapping.  */
336
	  bottom += len;
337
	}
338
    }
250
339
251
  /* Update records for the slots the non-options now occupy.  */
340
  /* Update records for the slots the non-options now occupy.  */
252
341
253
  first_nonopt += (optind - last_nonopt);
342
  first_nonopt += (optind - last_nonopt);
254
  last_nonopt = optind;
343
  last_nonopt = optind;
255
}
344
}
345
346
/* Initialize the internal data when the first call is made.  */
347
348
#if defined (__STDC__) && __STDC__
349
static const char *_getopt_initialize (int, char *const *, const char *);
350
#endif
351
static const char *
352
_getopt_initialize (argc, argv, optstring)
353
     int argc;
354
     char *const *argv;
355
     const char *optstring;
356
{
357
  /* Start processing options with ARGV-element 1 (since ARGV-element 0
358
     is the program name); the sequence of previously skipped
359
     non-option ARGV-elements is empty.  */
360
361
  first_nonopt = last_nonopt = optind = 1;
362
363
  nextchar = NULL;
364
365
  posixly_correct = getenv ("POSIXLY_CORRECT");
366
367
  /* Determine how to handle the ordering of options and nonoptions.  */
368
369
  if (optstring[0] == '-')
370
    {
371
      ordering = RETURN_IN_ORDER;
372
      ++optstring;
373
    }
374
  else if (optstring[0] == '+')
375
    {
376
      ordering = REQUIRE_ORDER;
377
      ++optstring;
378
    }
379
  else if (posixly_correct != NULL)
380
    ordering = REQUIRE_ORDER;
381
  else
382
    ordering = PERMUTE;
383
384
#ifdef _LIBC
385
  if (posixly_correct == NULL
386
      && argc == original_argc && argv == original_argv)
387
    {
388
      /* Bash 2.0 puts a special variable in the environment for each
389
	 command it runs, specifying which ARGV elements are the results of
390
	 file name wildcard expansion and therefore should not be
391
	 considered as options.  */
392
      char var[100];
393
      sprintf (var, "_%d_GNU_nonoption_argv_flags_", getpid ());
394
      nonoption_flags = getenv (var);
395
      if (nonoption_flags == NULL)
396
	nonoption_flags_len = 0;
397
      else
398
	nonoption_flags_len = strlen (nonoption_flags);
399
    }
400
  else
401
    nonoption_flags_len = 0;
402
#endif
403
404
  return optstring;
405
}
256
406
257
/* Scan elements of ARGV (whose length is ARGC) for option characters
407
/* Scan elements of ARGV (whose length is ARGC) for option characters
258
   given in OPTSTRING.
408
   given in OPTSTRING.
Lines 267-273 Link Here
267
   updating `optind' and `nextchar' so that the next call to `getopt' can
417
   updating `optind' and `nextchar' so that the next call to `getopt' can
268
   resume the scan with the following option character or ARGV-element.
418
   resume the scan with the following option character or ARGV-element.
269
419
270
   If there are no more option characters, `getopt' returns `EOF'.
420
   If there are no more option characters, `getopt' returns -1.
271
   Then `optind' is the index in ARGV of the first ARGV-element
421
   Then `optind' is the index in ARGV of the first ARGV-element
272
   that is not an option.  (The ARGV-elements have been permuted
422
   that is not an option.  (The ARGV-elements have been permuted
273
   so that those that are not options now come last.)
423
   so that those that are not options now come last.)
Lines 319-359 Link Here
319
     int *longind;
469
     int *longind;
320
     int long_only;
470
     int long_only;
321
{
471
{
322
  int option_index;
472
  optarg = NULL;
323
473
324
  optarg = 0;
474
  if (!__getopt_initialized || optind == 0)
325
326
  /* Initialize the internal data when the first call is made.
327
     Start processing options with ARGV-element 1 (since ARGV-element 0
328
     is the program name); the sequence of previously skipped
329
     non-option ARGV-elements is empty.  */
330
331
  if (optind == 0)
332
    {
475
    {
333
      first_nonopt = last_nonopt = optind = 1;
476
      optstring = _getopt_initialize (argc, argv, optstring);
334
477
      optind = 1;		/* Don't scan ARGV[0], the program name.  */
335
      nextchar = NULL;
478
      __getopt_initialized = 1;
336
337
      /* Determine how to handle the ordering of options and nonoptions.  */
338
339
      if (optstring[0] == '-')
340
	{
341
	  ordering = RETURN_IN_ORDER;
342
	  ++optstring;
343
	}
344
      else if (optstring[0] == '+')
345
	{
346
	  ordering = REQUIRE_ORDER;
347
	  ++optstring;
348
	}
349
      else if (getenv ("POSIXLY_CORRECT") != NULL)
350
	ordering = REQUIRE_ORDER;
351
      else
352
	ordering = PERMUTE;
353
    }
479
    }
354
480
481
  /* Test whether ARGV[optind] points to a non-option argument.
482
     Either it does not have option syntax, or there is an environment flag
483
     from the shell indicating it is not an option.  The later information
484
     is only used when the used in the GNU libc.  */
485
#ifdef _LIBC
486
#define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0'	      \
487
		     || (optind < nonoption_flags_len			      \
488
			 && nonoption_flags[optind] == '1'))
489
#else
490
#define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0')
491
#endif
492
355
  if (nextchar == NULL || *nextchar == '\0')
493
  if (nextchar == NULL || *nextchar == '\0')
356
    {
494
    {
495
      /* Advance to the next ARGV-element.  */
496
497
      /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
498
	 moved back by the user (who may also have changed the arguments).  */
499
      if (last_nonopt > optind)
500
	last_nonopt = optind;
501
      if (first_nonopt > optind)
502
	first_nonopt = optind;
503
357
      if (ordering == PERMUTE)
504
      if (ordering == PERMUTE)
358
	{
505
	{
359
	  /* If we have just processed some options following some non-options,
506
	  /* If we have just processed some options following some non-options,
Lines 364-384 Link Here
364
	  else if (last_nonopt != optind)
511
	  else if (last_nonopt != optind)
365
	    first_nonopt = optind;
512
	    first_nonopt = optind;
366
513
367
	  /* Now skip any additional non-options
514
	  /* Skip any additional non-options
368
	     and extend the range of non-options previously skipped.  */
515
	     and extend the range of non-options previously skipped.  */
369
516
370
	  while (optind < argc
517
	  while (optind < argc && NONOPTION_P)
371
		 && (argv[optind][0] != '-' || argv[optind][1] == '\0')
372
#ifdef GETOPT_COMPAT
373
		 && (longopts == NULL
374
		     || argv[optind][0] != '+' || argv[optind][1] == '\0')
375
#endif				/* GETOPT_COMPAT */
376
		 )
377
	    optind++;
518
	    optind++;
378
	  last_nonopt = optind;
519
	  last_nonopt = optind;
379
	}
520
	}
380
521
381
      /* Special ARGV-element `--' means premature end of options.
522
      /* The special ARGV-element `--' means premature end of options.
382
	 Skip it like a null option,
523
	 Skip it like a null option,
383
	 then exchange with previous non-options as if it were an option,
524
	 then exchange with previous non-options as if it were an option,
384
	 then skip everything else like a non-option.  */
525
	 then skip everything else like a non-option.  */
Lines 405-460 Link Here
405
	     that we previously skipped, so the caller will digest them.  */
546
	     that we previously skipped, so the caller will digest them.  */
406
	  if (first_nonopt != last_nonopt)
547
	  if (first_nonopt != last_nonopt)
407
	    optind = first_nonopt;
548
	    optind = first_nonopt;
408
	  return EOF;
549
	  return -1;
409
	}
550
	}
410
551
411
      /* If we have come to a non-option and did not permute it,
552
      /* If we have come to a non-option and did not permute it,
412
	 either stop the scan or describe it to the caller and pass it by.  */
553
	 either stop the scan or describe it to the caller and pass it by.  */
413
554
414
      if ((argv[optind][0] != '-' || argv[optind][1] == '\0')
555
      if (NONOPTION_P)
415
#ifdef GETOPT_COMPAT
416
	  && (longopts == NULL
417
	      || argv[optind][0] != '+' || argv[optind][1] == '\0')
418
#endif				/* GETOPT_COMPAT */
419
	  )
420
	{
556
	{
421
	  if (ordering == REQUIRE_ORDER)
557
	  if (ordering == REQUIRE_ORDER)
422
	    return EOF;
558
	    return -1;
423
	  optarg = argv[optind++];
559
	  optarg = argv[optind++];
424
	  return 1;
560
	  return 1;
425
	}
561
	}
426
562
427
      /* We have found another option-ARGV-element.
563
      /* We have found another option-ARGV-element.
428
	 Start decoding its characters.  */
564
	 Skip the initial punctuation.  */
429
565
430
      nextchar = (argv[optind] + 1
566
      nextchar = (argv[optind] + 1
431
		  + (longopts != NULL && argv[optind][1] == '-'));
567
		  + (longopts != NULL && argv[optind][1] == '-'));
432
    }
568
    }
433
569
570
  /* Decode the current option-ARGV-element.  */
571
572
  /* Check whether the ARGV-element is a long option.
573
574
     If long_only and the ARGV-element has the form "-f", where f is
575
     a valid short option, don't consider it an abbreviated form of
576
     a long option that starts with f.  Otherwise there would be no
577
     way to give the -f short option.
578
579
     On the other hand, if there's a long option "fubar" and
580
     the ARGV-element is "-fu", do consider that an abbreviation of
581
     the long option, just like "--fu", and not "-f" with arg "u".
582
583
     This distinction seems to be the most useful approach.  */
584
434
  if (longopts != NULL
585
  if (longopts != NULL
435
      && ((argv[optind][0] == '-'
586
      && (argv[optind][1] == '-'
436
	   && (argv[optind][1] == '-' || long_only))
587
	  || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1])))))
437
#ifdef GETOPT_COMPAT
438
	  || argv[optind][0] == '+'
439
#endif				/* GETOPT_COMPAT */
440
	  ))
441
    {
588
    {
589
      char *nameend;
442
      const struct option *p;
590
      const struct option *p;
443
      char *s = nextchar;
591
      const struct option *pfound = NULL;
444
      int exact = 0;
592
      int exact = 0;
445
      int ambig = 0;
593
      int ambig = 0;
446
      const struct option *pfound = NULL;
594
      int indfound = -1;
447
      int indfound;
595
      int option_index;
448
596
449
      while (*s && *s != '=')
597
      for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
450
	s++;
598
	/* Do nothing.  */ ;
451
599
452
      /* Test all options for either exact match or abbreviated matches.  */
600
      /* Test all long options for either exact match
453
      for (p = longopts, option_index = 0; p->name;
601
	 or abbreviated matches.  */
454
	   p++, option_index++)
602
      for (p = longopts, option_index = 0; p->name; p++, option_index++)
455
	if (!strncmp (p->name, nextchar, s - nextchar))
603
	if (!strncmp (p->name, nextchar, nameend - nextchar))
456
	  {
604
	  {
457
	    if (s - nextchar == strlen (p->name))
605
	    if ((unsigned int) (nameend - nextchar)
606
		== (unsigned int) strlen (p->name))
458
	      {
607
	      {
459
		/* Exact match found.  */
608
		/* Exact match found.  */
460
		pfound = p;
609
		pfound = p;
Lines 469-485 Link Here
469
		indfound = option_index;
618
		indfound = option_index;
470
	      }
619
	      }
471
	    else
620
	    else
472
	      /* Second nonexact match found.  */
621
	      /* Second or later nonexact match found.  */
473
	      ambig = 1;
622
	      ambig = 1;
474
	  }
623
	  }
475
624
476
      if (ambig && !exact)
625
      if (ambig && !exact)
477
	{
626
	{
478
	  if (opterr)
627
	  if (opterr)
479
	    fprintf (stderr, "%s: option `%s' is ambiguous\n",
628
	    fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
480
		     argv[0], argv[optind]);
629
		     argv[0], argv[optind]);
481
	  nextchar += strlen (nextchar);
630
	  nextchar += strlen (nextchar);
482
	  optind++;
631
	  optind++;
632
	  optopt = 0;
483
	  return '?';
633
	  return '?';
484
	}
634
	}
485
635
Lines 487-514 Link Here
487
	{
637
	{
488
	  option_index = indfound;
638
	  option_index = indfound;
489
	  optind++;
639
	  optind++;
490
	  if (*s)
640
	  if (*nameend)
491
	    {
641
	    {
492
	      /* Don't test has_arg with >, because some C compilers don't
642
	      /* Don't test has_arg with >, because some C compilers don't
493
		 allow it to be used on enums.  */
643
		 allow it to be used on enums.  */
494
	      if (pfound->has_arg)
644
	      if (pfound->has_arg)
495
		optarg = s + 1;
645
		optarg = nameend + 1;
496
	      else
646
	      else
497
		{
647
		{
498
		  if (opterr)
648
		  if (opterr)
499
		    {
649
		   if (argv[optind - 1][1] == '-')
500
		      if (argv[optind - 1][1] == '-')
650
		    /* --option */
501
			/* --option */
651
		    fprintf (stderr,
502
			fprintf (stderr,
652
		     _("%s: option `--%s' doesn't allow an argument\n"),
503
				 "%s: option `--%s' doesn't allow an argument\n",
653
		     argv[0], pfound->name);
504
				 argv[0], pfound->name);
654
		   else
505
		      else
655
		    /* +option or -option */
506
			/* +option or -option */
656
		    fprintf (stderr,
507
			fprintf (stderr,
657
		     _("%s: option `%c%s' doesn't allow an argument\n"),
508
			     "%s: option `%c%s' doesn't allow an argument\n",
658
		     argv[0], argv[optind - 1][0], pfound->name);
509
			     argv[0], argv[optind - 1][0], pfound->name);
659
510
		    }
511
		  nextchar += strlen (nextchar);
660
		  nextchar += strlen (nextchar);
661
662
		  optopt = pfound->val;
512
		  return '?';
663
		  return '?';
513
		}
664
		}
514
	    }
665
	    }
Lines 519-527 Link Here
519
	      else
670
	      else
520
		{
671
		{
521
		  if (opterr)
672
		  if (opterr)
522
		    fprintf (stderr, "%s: option `%s' requires an argument\n",
673
		    fprintf (stderr,
523
			     argv[0], argv[optind - 1]);
674
			   _("%s: option `%s' requires an argument\n"),
675
			   argv[0], argv[optind - 1]);
524
		  nextchar += strlen (nextchar);
676
		  nextchar += strlen (nextchar);
677
		  optopt = pfound->val;
525
		  return optstring[0] == ':' ? ':' : '?';
678
		  return optstring[0] == ':' ? ':' : '?';
526
		}
679
		}
527
	    }
680
	    }
Lines 535-568 Link Here
535
	    }
688
	    }
536
	  return pfound->val;
689
	  return pfound->val;
537
	}
690
	}
691
538
      /* Can't find it as a long option.  If this is not getopt_long_only,
692
      /* Can't find it as a long option.  If this is not getopt_long_only,
539
	 or the option starts with '--' or is not a valid short
693
	 or the option starts with '--' or is not a valid short
540
	 option, then it's an error.
694
	 option, then it's an error.
541
	 Otherwise interpret it as a short option.  */
695
	 Otherwise interpret it as a short option.  */
542
      if (!long_only || argv[optind][1] == '-'
696
      if (!long_only || argv[optind][1] == '-'
543
#ifdef GETOPT_COMPAT
544
	  || argv[optind][0] == '+'
545
#endif				/* GETOPT_COMPAT */
546
	  || my_index (optstring, *nextchar) == NULL)
697
	  || my_index (optstring, *nextchar) == NULL)
547
	{
698
	{
548
	  if (opterr)
699
	  if (opterr)
549
	    {
700
	    {
550
	      if (argv[optind][1] == '-')
701
	      if (argv[optind][1] == '-')
551
		/* --option */
702
		/* --option */
552
		fprintf (stderr, "%s: unrecognized option `--%s'\n",
703
		fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
553
			 argv[0], nextchar);
704
			 argv[0], nextchar);
554
	      else
705
	      else
555
		/* +option or -option */
706
		/* +option or -option */
556
		fprintf (stderr, "%s: unrecognized option `%c%s'\n",
707
		fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
557
			 argv[0], argv[optind][0], nextchar);
708
			 argv[0], argv[optind][0], nextchar);
558
	    }
709
	    }
559
	  nextchar = (char *) "";
710
	  nextchar = (char *) "";
560
	  optind++;
711
	  optind++;
712
	  optopt = 0;
561
	  return '?';
713
	  return '?';
562
	}
714
	}
563
    }
715
    }
564
716
565
  /* Look at and handle the next option-character.  */
717
  /* Look at and handle the next short option-character.  */
566
718
567
  {
719
  {
568
    char c = *nextchar++;
720
    char c = *nextchar++;
Lines 576-595 Link Here
576
      {
728
      {
577
	if (opterr)
729
	if (opterr)
578
	  {
730
	  {
579
#if 0
731
	    if (posixly_correct)
580
	    if (c < 040 || c >= 0177)
732
	      /* 1003.2 specifies the format of this message.  */
581
	      fprintf (stderr, "%s: unrecognized option, character code 0%o\n",
733
	      fprintf (stderr, _("%s: illegal option -- %c\n"),
582
		       argv[0], c);
734
		       argv[0], c);
583
	    else
735
	    else
584
	      fprintf (stderr, "%s: unrecognized option `-%c'\n", argv[0], c);
736
	      fprintf (stderr, _("%s: invalid option -- %c\n"),
585
#else
737
		       argv[0], c);
586
	    /* 1003.2 specifies the format of this message.  */
587
	    fprintf (stderr, "%s: illegal option -- %c\n", argv[0], c);
588
#endif
589
	  }
738
	  }
590
	optopt = c;
739
	optopt = c;
591
	return '?';
740
	return '?';
592
      }
741
      }
742
    /* Convenience. Treat POSIX -W foo same as long option --foo */
743
    if (temp[0] == 'W' && temp[1] == ';')
744
      {
745
	char *nameend;
746
	const struct option *p;
747
	const struct option *pfound = NULL;
748
	int exact = 0;
749
	int ambig = 0;
750
	int indfound = 0;
751
	int option_index;
752
753
	/* This is an option that requires an argument.  */
754
	if (*nextchar != '\0')
755
	  {
756
	    optarg = nextchar;
757
	    /* If we end this ARGV-element by taking the rest as an arg,
758
	       we must advance to the next element now.  */
759
	    optind++;
760
	  }
761
	else if (optind == argc)
762
	  {
763
	    if (opterr)
764
	      {
765
		/* 1003.2 specifies the format of this message.  */
766
		fprintf (stderr, _("%s: option requires an argument -- %c\n"),
767
			 argv[0], c);
768
	      }
769
	    optopt = c;
770
	    if (optstring[0] == ':')
771
	      c = ':';
772
	    else
773
	      c = '?';
774
	    return c;
775
	  }
776
	else
777
	  /* We already incremented `optind' once;
778
	     increment it again when taking next ARGV-elt as argument.  */
779
	  optarg = argv[optind++];
780
781
	/* optarg is now the argument, see if it's in the
782
	   table of longopts.  */
783
784
	for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++)
785
	  /* Do nothing.  */ ;
786
787
	/* Test all long options for either exact match
788
	   or abbreviated matches.  */
789
	for (p = longopts, option_index = 0; p->name; p++, option_index++)
790
	  if (!strncmp (p->name, nextchar, nameend - nextchar))
791
	    {
792
	      if ((unsigned int) (nameend - nextchar) == strlen (p->name))
793
		{
794
		  /* Exact match found.  */
795
		  pfound = p;
796
		  indfound = option_index;
797
		  exact = 1;
798
		  break;
799
		}
800
	      else if (pfound == NULL)
801
		{
802
		  /* First nonexact match found.  */
803
		  pfound = p;
804
		  indfound = option_index;
805
		}
806
	      else
807
		/* Second or later nonexact match found.  */
808
		ambig = 1;
809
	    }
810
	if (ambig && !exact)
811
	  {
812
	    if (opterr)
813
	      fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"),
814
		       argv[0], argv[optind]);
815
	    nextchar += strlen (nextchar);
816
	    optind++;
817
	    return '?';
818
	  }
819
	if (pfound != NULL)
820
	  {
821
	    option_index = indfound;
822
	    if (*nameend)
823
	      {
824
		/* Don't test has_arg with >, because some C compilers don't
825
		   allow it to be used on enums.  */
826
		if (pfound->has_arg)
827
		  optarg = nameend + 1;
828
		else
829
		  {
830
		    if (opterr)
831
		      fprintf (stderr, _("\
832
%s: option `-W %s' doesn't allow an argument\n"),
833
			       argv[0], pfound->name);
834
835
		    nextchar += strlen (nextchar);
836
		    return '?';
837
		  }
838
	      }
839
	    else if (pfound->has_arg == 1)
840
	      {
841
		if (optind < argc)
842
		  optarg = argv[optind++];
843
		else
844
		  {
845
		    if (opterr)
846
		      fprintf (stderr,
847
			       _("%s: option `%s' requires an argument\n"),
848
			       argv[0], argv[optind - 1]);
849
		    nextchar += strlen (nextchar);
850
		    return optstring[0] == ':' ? ':' : '?';
851
		  }
852
	      }
853
	    nextchar += strlen (nextchar);
854
	    if (longind != NULL)
855
	      *longind = option_index;
856
	    if (pfound->flag)
857
	      {
858
		*(pfound->flag) = pfound->val;
859
		return 0;
860
	      }
861
	    return pfound->val;
862
	  }
863
	  nextchar = NULL;
864
	  return 'W';	/* Let the application handle it.   */
865
      }
593
    if (temp[1] == ':')
866
    if (temp[1] == ':')
594
      {
867
      {
595
	if (temp[2] == ':')
868
	if (temp[2] == ':')
Lines 601-607 Link Here
601
		optind++;
874
		optind++;
602
	      }
875
	      }
603
	    else
876
	    else
604
	      optarg = 0;
877
	      optarg = NULL;
605
	    nextchar = NULL;
878
	    nextchar = NULL;
606
	  }
879
	  }
607
	else
880
	else
Lines 618-631 Link Here
618
	      {
891
	      {
619
		if (opterr)
892
		if (opterr)
620
		  {
893
		  {
621
#if 0
622
		    fprintf (stderr, "%s: option `-%c' requires an argument\n",
623
			     argv[0], c);
624
#else
625
		    /* 1003.2 specifies the format of this message.  */
894
		    /* 1003.2 specifies the format of this message.  */
626
		    fprintf (stderr, "%s: option requires an argument -- %c\n",
895
		    fprintf (stderr,
627
			     argv[0], c);
896
			   _("%s: option requires an argument -- %c\n"),
628
#endif
897
			   argv[0], c);
629
		  }
898
		  }
630
		optopt = c;
899
		optopt = c;
631
		if (optstring[0] == ':')
900
		if (optstring[0] == ':')
Lines 656-662 Link Here
656
			   0);
925
			   0);
657
}
926
}
658
927
659
#endif	/* _LIBC or not __GNU_LIBRARY__.  */
928
#endif	/* Not ELIDE_CODE.  */
660
929
661
#ifdef TEST
930
#ifdef TEST
662
931
Lines 676-682 Link Here
676
      int this_option_optind = optind ? optind : 1;
945
      int this_option_optind = optind ? optind : 1;
677
946
678
      c = getopt (argc, argv, "abc:d:0123456789");
947
      c = getopt (argc, argv, "abc:d:0123456789");
679
      if (c == EOF)
948
      if (c == -1)
680
	break;
949
	break;
681
950
682
      switch (c)
951
      switch (c)
(-)grep/getopt.h (-17 / +21 lines)
Lines 1-19 Link Here
1
/* Declarations for getopt.
1
/* Declarations for getopt.
2
   Copyright (C) 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
2
   Copyright (C) 1989,90,91,92,93,94,96,97, 98 Free Software Foundation, Inc.
3
3
4
   This program is free software; you can redistribute it and/or modify it
4
   This file is part of the GNU C Library.  Its master source is NOT part of
5
   under the terms of the GNU General Public License as published by the
5
   the C library, however.  The master source lives in /gd/gnu/lib.
6
   Free Software Foundation; either version 2, or (at your option) any
7
   later version.
8
6
9
   This program is distributed in the hope that it will be useful,
7
   The GNU C Library is free software; you can redistribute it and/or
8
   modify it under the terms of the GNU Library General Public License as
9
   published by the Free Software Foundation; either version 2 of the
10
   License, or (at your option) any later version.
11
12
   The GNU C Library is distributed in the hope that it will be useful,
10
   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
   but WITHOUT ANY WARRANTY; without even the implied warranty of
11
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
   GNU General Public License for more details.
15
   Library General Public License for more details.
13
16
14
   You should have received a copy of the GNU General Public License
17
   You should have received a copy of the GNU Library General Public
15
   along with this program; if not, write to the Free Software
18
   License along with the GNU C Library; see the file COPYING.LIB.  If not,
16
   Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
19
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20
   Boston, MA 02111-1307, USA.  */
17
21
18
#ifndef _GETOPT_H
22
#ifndef _GETOPT_H
19
#define _GETOPT_H 1
23
#define _GETOPT_H 1
Lines 36-42 Link Here
36
40
37
   On entry to `getopt', zero means this is the first call; initialize.
41
   On entry to `getopt', zero means this is the first call; initialize.
38
42
39
   When `getopt' returns EOF, this is the index of the first of the
43
   When `getopt' returns -1, this is the index of the first of the
40
   non-option elements that the caller should itself scan.
44
   non-option elements that the caller should itself scan.
41
45
42
   Otherwise, `optind' communicates from one call to the next
46
   Otherwise, `optind' communicates from one call to the next
Lines 76-82 Link Here
76
80
77
struct option
81
struct option
78
{
82
{
79
#if	__STDC__
83
#if defined (__STDC__) && __STDC__
80
  const char *name;
84
  const char *name;
81
#else
85
#else
82
  char *name;
86
  char *name;
Lines 94-108 Link Here
94
#define required_argument	1
98
#define required_argument	1
95
#define optional_argument	2
99
#define optional_argument	2
96
100
97
#if __STDC__
101
#if defined (__STDC__) && __STDC__
98
#if defined(__GNU_LIBRARY__)
102
#ifdef __GNU_LIBRARY__
99
/* Many other libraries have conflicting prototypes for getopt, with
103
/* Many other libraries have conflicting prototypes for getopt, with
100
   differences in the consts, in stdlib.h.  To avoid compilation
104
   differences in the consts, in stdlib.h.  To avoid compilation
101
   errors, only prototype getopt for the GNU C library.  */
105
   errors, only prototype getopt for the GNU C library.  */
102
extern int getopt (int argc, char *const *argv, const char *shortopts);
106
extern int getopt (int argc, char *const *argv, const char *shortopts);
103
#else /* not __GNU_LIBRARY__ */
107
#else /* not __GNU_LIBRARY__ */
104
extern int getopt ();
108
extern int getopt ();
105
#endif /* not __GNU_LIBRARY__ */
109
#endif /* __GNU_LIBRARY__ */
106
extern int getopt_long (int argc, char *const *argv, const char *shortopts,
110
extern int getopt_long (int argc, char *const *argv, const char *shortopts,
107
		        const struct option *longopts, int *longind);
111
		        const struct option *longopts, int *longind);
108
extern int getopt_long_only (int argc, char *const *argv,
112
extern int getopt_long_only (int argc, char *const *argv,
Lines 120-126 Link Here
120
extern int getopt_long_only ();
124
extern int getopt_long_only ();
121
125
122
extern int _getopt_internal ();
126
extern int _getopt_internal ();
123
#endif /* not __STDC__ */
127
#endif /* __STDC__ */
124
128
125
#ifdef	__cplusplus
129
#ifdef	__cplusplus
126
}
130
}
(-)grep/getopt1.c (+189 lines)
Line 0 Link Here
1
/* getopt_long and getopt_long_only entry points for GNU getopt.
2
   Copyright (C) 1987,88,89,90,91,92,93,94,96,97, 98 Free Software Foundation, Inc.
3
4
   This file is part of the GNU C Library.  Its master source is NOT part of
5
   the C library, however.  The master source lives in /gd/gnu/lib.
6
7
   The GNU C Library is free software; you can redistribute it and/or
8
   modify it under the terms of the GNU Library General Public License as
9
   published by the Free Software Foundation; either version 2 of the
10
   License, or (at your option) any later version.
11
12
   The GNU C Library is distributed in the hope that it will be useful,
13
   but WITHOUT ANY WARRANTY; without even the implied warranty of
14
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
   Library General Public License for more details.
16
17
   You should have received a copy of the GNU Library General Public
18
   License along with the GNU C Library; see the file COPYING.LIB.  If not,
19
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20
   Boston, MA 02111-1307, USA.  */
21
22
#ifdef HAVE_CONFIG_H
23
#include <config.h>
24
#endif
25
26
#include "getopt.h"
27
28
#if !defined (__STDC__) || !__STDC__
29
/* This is a separate conditional since some stdc systems
30
   reject `defined (const)'.  */
31
#ifndef const
32
#define const
33
#endif
34
#endif
35
36
#include <stdio.h>
37
38
/* Comment out all this code if we are using the GNU C Library, and are not
39
   actually compiling the library itself.  This code is part of the GNU C
40
   Library, but also included in many other GNU distributions.  Compiling
41
   and linking in this code is a waste when using the GNU C library
42
   (especially if it is a shared library).  Rather than having every GNU
43
   program understand `configure --with-gnu-libc' and omit the object files,
44
   it is simpler to just do this in the source for each such file.  */
45
46
#define GETOPT_INTERFACE_VERSION 2
47
#if !defined (_LIBC) && defined (__GLIBC__) && __GLIBC__ >= 2
48
#include <gnu-versions.h>
49
#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
50
#define ELIDE_CODE
51
#endif
52
#endif
53
54
#ifndef ELIDE_CODE
55
56
57
/* This needs to come after some library #include
58
   to get __GNU_LIBRARY__ defined.  */
59
#ifdef __GNU_LIBRARY__
60
#include <stdlib.h>
61
#endif
62
63
#ifndef	NULL
64
#define NULL 0
65
#endif
66
67
int
68
getopt_long (argc, argv, options, long_options, opt_index)
69
     int argc;
70
     char *const *argv;
71
     const char *options;
72
     const struct option *long_options;
73
     int *opt_index;
74
{
75
  return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
76
}
77
78
/* Like getopt_long, but '-' as well as '--' can indicate a long option.
79
   If an option that starts with '-' (not '--') doesn't match a long option,
80
   but does match a short option, it is parsed as a short option
81
   instead.  */
82
83
int
84
getopt_long_only (argc, argv, options, long_options, opt_index)
85
     int argc;
86
     char *const *argv;
87
     const char *options;
88
     const struct option *long_options;
89
     int *opt_index;
90
{
91
  return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
92
}
93
94
95
#endif	/* Not ELIDE_CODE.  */
96
97
#ifdef TEST
98
99
#include <stdio.h>
100
101
int
102
main (argc, argv)
103
     int argc;
104
     char **argv;
105
{
106
  int c;
107
  int digit_optind = 0;
108
109
  while (1)
110
    {
111
      int this_option_optind = optind ? optind : 1;
112
      int option_index = 0;
113
      static struct option long_options[] =
114
      {
115
	{"add", 1, 0, 0},
116
	{"append", 0, 0, 0},
117
	{"delete", 1, 0, 0},
118
	{"verbose", 0, 0, 0},
119
	{"create", 0, 0, 0},
120
	{"file", 1, 0, 0},
121
	{0, 0, 0, 0}
122
      };
123
124
      c = getopt_long (argc, argv, "abc:d:0123456789",
125
		       long_options, &option_index);
126
      if (c == -1)
127
	break;
128
129
      switch (c)
130
	{
131
	case 0:
132
	  printf (_("option %s"), long_options[option_index].name);
133
	  if (optarg)
134
	    printf (_(" with arg %s"), optarg);
135
	  printf ("\n");
136
	  break;
137
138
	case '0':
139
	case '1':
140
	case '2':
141
	case '3':
142
	case '4':
143
	case '5':
144
	case '6':
145
	case '7':
146
	case '8':
147
	case '9':
148
	  if (digit_optind != 0 && digit_optind != this_option_optind)
149
	    printf (_("digits occur in two different argv-elements.\n"));
150
	  digit_optind = this_option_optind;
151
	  printf (_("option %c\n"), c);
152
	  break;
153
154
	case 'a':
155
	  printf (_("option a\n"));
156
	  break;
157
158
	case 'b':
159
	  printf (_("option b\n"));
160
	  break;
161
162
	case 'c':
163
	  printf (_("option c with value `%s'\n"), optarg);
164
	  break;
165
166
	case 'd':
167
	  printf (_("option d with value `%s'\n"), optarg);
168
	  break;
169
170
	case '?':
171
	  break;
172
173
	default:
174
	  printf (_("?? getopt returned character code 0%o ??\n"), c);
175
	}
176
    }
177
178
  if (optind < argc)
179
    {
180
      printf (_("non-option ARGV-elements: "));
181
      while (optind < argc)
182
	printf ("%s ", argv[optind++]);
183
      printf ("\n");
184
    }
185
186
  exit (0);
187
}
188
189
#endif /* TEST */
(-)grep/getpagesize.h (-38 / +37 lines)
Lines 1-42 Link Here
1
#ifdef BSD
1
/* Emulate getpagesize on systems that lack it.  */
2
#ifndef BSD4_1
3
#define HAVE_GETPAGESIZE
4
#endif
5
#endif
6
2
7
#ifndef HAVE_GETPAGESIZE
3
#ifndef HAVE_GETPAGESIZE
8
4
9
#ifdef VMS
5
# ifdef VMS
10
#define getpagesize() 512
6
#  define getpagesize() 512
11
#endif
7
# endif
12
8
13
#ifdef HAVE_UNISTD_H
9
# ifdef HAVE_UNISTD_H
14
#include <unistd.h>
10
#  include <unistd.h>
15
#endif
11
# endif
16
12
17
#ifdef _SC_PAGESIZE
13
# ifdef _SC_PAGESIZE
18
#define getpagesize() sysconf(_SC_PAGESIZE)
14
#  define getpagesize() sysconf(_SC_PAGESIZE)
19
#else
15
# else /* no _SC_PAGESIZE */
20
16
#  ifdef HAVE_SYS_PARAM_H
21
#ifdef HAVE_SYS_PARAM_H
17
#   include <sys/param.h>
22
#include <sys/param.h>
18
#   ifdef EXEC_PAGESIZE
23
19
#    define getpagesize() EXEC_PAGESIZE
24
#ifdef EXEC_PAGESIZE
20
#   else /* no EXEC_PAGESIZE */
25
#define getpagesize() EXEC_PAGESIZE
21
#    ifdef NBPG
26
#else
22
#     define getpagesize() NBPG * CLSIZE
27
#ifdef NBPG
23
#     ifndef CLSIZE
28
#define getpagesize() NBPG * CLSIZE
24
#      define CLSIZE 1
29
#ifndef CLSIZE
25
#     endif /* no CLSIZE */
30
#define CLSIZE 1
26
#    else /* no NBPG */
31
#endif /* no CLSIZE */
27
#     ifdef NBPC
32
#else /* no NBPG */
28
#      define getpagesize() NBPC
33
#define getpagesize() NBPC
29
#     else /* no NBPC */
34
#endif /* no NBPG */
30
#      ifdef PAGESIZE
35
#endif /* no EXEC_PAGESIZE */
31
#       define getpagesize() PAGESIZE
36
#else /* !HAVE_SYS_PARAM_H */
32
#      endif /* PAGESIZE */
37
#define getpagesize() 8192	/* punt totally */
33
#     endif /* no NBPC */
38
#endif /* !HAVE_SYS_PARAM_H */
34
#    endif /* no NBPG */
39
#endif /* no _SC_PAGESIZE */
35
#   endif /* no EXEC_PAGESIZE */
40
36
#  else /* no HAVE_SYS_PARAM_H */
41
#endif /* not HAVE_GETPAGESIZE */
37
#   define getpagesize() 8192	/* punt totally */
38
#  endif /* no HAVE_SYS_PARAM_H */
39
# endif /* no _SC_PAGESIZE */
42
40
41
#endif /* no HAVE_GETPAGESIZE */
(-)grep/grep.c (-572 / +793 lines)
Lines 1-5 Link Here
1
/* grep.c - main driver file for grep.
1
/* grep.c - main driver file for grep.
2
   Copyright (C) 1992 Free Software Foundation, Inc.
2
   Copyright (C) 1992, 1997, 1998, 1999 Free Software Foundation, Inc.
3
3
4
   This program is free software; you can redistribute it and/or modify
4
   This program is free software; you can redistribute it and/or modify
5
   it under the terms of the GNU General Public License as published by
5
   it under the terms of the GNU General Public License as published by
Lines 13-272 Link Here
13
13
14
   You should have received a copy of the GNU General Public License
14
   You should have received a copy of the GNU General Public License
15
   along with this program; if not, write to the Free Software
15
   along with this program; if not, write to the Free Software
16
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
17
   02111-1307, USA.  */
17
18
18
   Written July 1992 by Mike Haertel.  
19
/* Written July 1992 by Mike Haertel.  */
19
20
20
   Recursive searching and builtin decompression (libz)	
21
#ifdef HAVE_CONFIG_H
21
   1996/1997 by Wolfram Schneider <wosch@FreeBSD.org>. */
22
# include <config.h>
22
23
#include <errno.h>
24
#include <stdio.h>
25
#ifdef __FreeBSD__
26
#include <locale.h>
27
#endif
23
#endif
28
29
#ifndef errno
30
extern int errno;
31
#endif
32
33
#ifdef STDC_HEADERS
34
#include <stdlib.h>
35
#else
36
#include <sys/types.h>
24
#include <sys/types.h>
37
extern char *malloc(), *realloc();
25
#include <sys/stat.h>
38
extern void free();
26
#if defined(HAVE_MMAP)
39
#endif
27
# include <sys/mman.h>
40
41
#if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
42
#include <string.h>
43
#ifdef NEED_MEMORY_H
44
#include <memory.h>
45
#endif
46
#else
47
#include <strings.h>
48
#ifdef __STDC__
49
extern void *memchr();
50
#else
51
extern char *memchr();
52
#endif
28
#endif
53
#define strrchr rindex
29
#if defined(HAVE_SETRLIMIT)
30
# include <sys/time.h>
31
# include <sys/resource.h>
54
#endif
32
#endif
55
33
#include <stdio.h>
56
#ifdef HAVE_UNISTD_H
34
#include "system.h"
57
#include <sys/types.h>
35
#include "getopt.h"
58
#include <fcntl.h>
59
#include <unistd.h>
60
#else
61
#define O_RDONLY 0
62
#define STDIN_FILENO 0
63
extern int open(), read(), close();
64
#endif
65
66
#include "getpagesize.h"
36
#include "getpagesize.h"
67
#include "grep.h"
37
#include "grep.h"
38
#include "savedir.h"
68
39
69
#undef MAX
40
#undef MAX
70
#define MAX(A,B) ((A) > (B) ? (A) : (B))
41
#define MAX(A,B) ((A) > (B) ? (A) : (B))
71
42
72
/* Provide missing ANSI features if necessary. */
43
struct stats
44
{
45
  struct stats *parent;
46
  struct stat stat;
47
};
73
48
74
#ifndef HAVE_STRERROR
49
/* base of chain of stat buffers, used to detect directory loops */
75
extern int sys_nerr;
50
static struct stats stats_base;
76
extern char *sys_errlist[];
77
#define strerror(E) ((E) < sys_nerr ? sys_errlist[(E)] : "bogus error number")
78
#endif
79
51
80
#ifndef HAVE_MEMCHR
52
/* if non-zero, display usage information and exit */
81
#ifdef __STDC__
53
static int show_help;
82
#define VOID void
83
#else
84
#define VOID char
85
#endif
86
VOID *
87
memchr(vp, c, n)
88
     VOID *vp;
89
     int c;
90
     size_t n;
91
{
92
  unsigned char *p;
93
94
  for (p = (unsigned char *) vp; n--; ++p)
95
    if (*p == c)
96
      return (VOID *) p;
97
  return 0;
98
}
99
#endif
100
54
101
/* traverse a file hierarchy library */
55
/* If non-zero, print the version on standard output and exit.  */
102
#if HAVE_FTS > 0
56
static int show_version;
103
#include <sys/types.h>
104
#include <sys/stat.h>
105
#include <fts.h>
106
#endif
107
57
108
/* don't search in binary files */
58
/* Long options equivalences. */
109
int aflag;
59
static struct option long_options[] =
60
{
61
  {"after-context", required_argument, NULL, 'A'},
62
  {"basic-regexp", no_argument, NULL, 'G'},
63
  {"before-context", required_argument, NULL, 'B'},
64
  {"byte-offset", no_argument, NULL, 'b'},
65
  {"context", optional_argument, NULL, 'C'},
66
  {"count", no_argument, NULL, 'c'},
67
  {"directories", required_argument, NULL, 'd'},
68
  {"extended-regexp", no_argument, NULL, 'E'},
69
  {"file", required_argument, NULL, 'f'},
70
  {"files-with-matches", no_argument, NULL, 'l'},
71
  {"files-without-match", no_argument, NULL, 'L'},
72
  {"fixed-regexp", no_argument, NULL, 'F'},
73
  {"fixed-strings", no_argument, NULL, 'F'},
74
  {"help", no_argument, &show_help, 1},
75
  {"ignore-case", no_argument, NULL, 'i'},
76
  {"line-number", no_argument, NULL, 'n'},
77
  {"line-regexp", no_argument, NULL, 'x'},
78
  {"no-filename", no_argument, NULL, 'h'},
79
  {"no-messages", no_argument, NULL, 's'},
80
  {"quiet", no_argument, NULL, 'q'},
81
  {"recursive", no_argument, NULL, 'r'},
82
  {"regexp", required_argument, NULL, 'e'},
83
  {"revert-match", no_argument, NULL, 'v'},
84
  {"silent", no_argument, NULL, 'q'},
85
  {"text", no_argument, NULL, 'a'},
86
#if O_BINARY
87
  {"binary", no_argument, NULL, 'U'},
88
  {"unix-byte-offsets", no_argument, NULL, 'u'},
89
#endif
90
  {"version", no_argument, NULL, 'V'},
91
  {"with-filename", no_argument, NULL, 'H'},
92
  {"word-regexp", no_argument, NULL, 'w'},
93
  {0, 0, 0, 0}
94
};
110
95
111
/* Define flags declared in grep.h. */
96
/* Define flags declared in grep.h. */
112
char *matcher;
97
char const *matcher;
113
int match_icase;
98
int match_icase;
114
int match_words;
99
int match_words;
115
int match_lines;
100
int match_lines;
116
101
117
/* Functions we'll use to search. */
118
static void (*compile)();
119
static char *(*execute)();
120
121
/* For error messages. */
102
/* For error messages. */
122
static char *prog;
103
static char *prog;
123
static char *filename;
104
static char const *filename;
124
static int errseen;
105
static int errseen;
125
106
107
/* How to handle directories.  */
108
static enum
109
  {
110
    READ_DIRECTORIES,
111
    RECURSE_DIRECTORIES,
112
    SKIP_DIRECTORIES
113
  } directories;
114
115
static int  ck_atoi PARAMS ((char const *, int *));
116
static void usage PARAMS ((int)) __attribute__((noreturn));
117
static void error PARAMS ((const char *, int));
118
static int  setmatcher PARAMS ((char const *));
119
static char *page_alloc PARAMS ((size_t, char **));
120
static int  reset PARAMS ((int, char const *, struct stats *));
121
static int  fillbuf PARAMS ((size_t, struct stats *));
122
static int  grepbuf PARAMS ((char *, char *));
123
static void prtext PARAMS ((char *, char *, int *));
124
static void prpending PARAMS ((char *));
125
static void prline PARAMS ((char *, char *, int));
126
static void print_offset_sep PARAMS ((off_t, int));
127
static void nlscan PARAMS ((char *));
128
static int  grep PARAMS ((int, char const *, struct stats *));
129
static int  grepdir PARAMS ((char const *, struct stats *));
130
static int  grepfile PARAMS ((char const *, struct stats *));
131
#if O_BINARY
132
static inline int undossify_input PARAMS ((register char *, size_t));
133
#endif
134
135
/* Functions we'll use to search. */
136
static void (*compile) PARAMS ((char *, size_t));
137
static char *(*execute) PARAMS ((char *, size_t, char **));
138
126
/* Print a message and possibly an error string.  Remember
139
/* Print a message and possibly an error string.  Remember
127
   that something awful happened. */
140
   that something awful happened. */
128
static void
141
static void
129
error(mesg, errnum)
142
error (mesg, errnum)
130
#ifdef __STDC__
143
     const char *mesg;
131
     const
132
#endif
133
     char *mesg;
134
     int errnum;
144
     int errnum;
135
{
145
{
136
  if (errnum)
146
  if (errnum)
137
    fprintf(stderr, "%s: %s: %s\n", prog, mesg, strerror(errnum));
147
    fprintf (stderr, "%s: %s: %s\n", prog, mesg, strerror (errnum));
138
  else
148
  else
139
    fprintf(stderr, "%s: %s\n", prog, mesg);
149
    fprintf (stderr, "%s: %s\n", prog, mesg);
140
  errseen = 1;
150
  errseen = 1;
141
}
151
}
142
152
143
/* Like error(), but die horribly after printing. */
153
/* Like error (), but die horribly after printing. */
144
void
154
void
145
fatal(mesg, errnum)
155
fatal (mesg, errnum)
146
#ifdef __STDC__
156
     const char *mesg;
147
     const
148
#endif
149
     char *mesg;
150
     int errnum;
157
     int errnum;
151
{
158
{
152
  error(mesg, errnum);
159
  error (mesg, errnum);
153
  exit(2);
160
  exit (2);
154
}
161
}
155
162
156
/* Interface to handle errors and fix library lossage. */
163
/* Interface to handle errors and fix library lossage. */
157
char *
164
char *
158
xmalloc(size)
165
xmalloc (size)
159
     size_t size;
166
     size_t size;
160
{
167
{
161
  char *result;
168
  char *result;
162
169
163
  result = malloc(size);
170
  result = malloc (size);
164
  if (size && !result)
171
  if (size && !result)
165
    fatal("memory exhausted", 0);
172
    fatal (_("memory exhausted"), 0);
166
  return result;
173
  return result;
167
}
174
}
168
175
169
/* Interface to handle errors and fix some library lossage. */
176
/* Interface to handle errors and fix some library lossage. */
170
char *
177
char *
171
xrealloc(ptr, size)
178
xrealloc (ptr, size)
172
     char *ptr;
179
     char *ptr;
173
     size_t size;
180
     size_t size;
174
{
181
{
175
  char *result;
182
  char *result;
176
183
177
  if (ptr)
184
  if (ptr)
178
    result = realloc(ptr, size);
185
    result = realloc (ptr, size);
179
  else
186
  else
180
    result = malloc(size);
187
    result = malloc (size);
181
  if (size && !result)
188
  if (size && !result)
182
    fatal("memory exhausted", 0);
189
    fatal (_("memory exhausted"), 0);
183
  return result;
190
  return result;
184
}
191
}
185
192
186
#if !defined(HAVE_VALLOC)
193
/* Convert STR to a positive integer, storing the result in *OUT.
187
#define valloc malloc
194
   If STR is not a valid integer, return -1 (otherwise 0). */
188
#else
195
static int
189
#ifdef __STDC__
196
ck_atoi (str, out)
190
extern void *valloc(size_t);
197
     char const *str;
191
#else
198
     int *out;
192
extern char *valloc();
199
{
193
#endif
200
  char const *p;
194
#endif
201
  for (p = str; *p; p++)
202
    if (*p < '0' || *p > '9')
203
      return -1;
204
205
  *out = atoi (optarg);
206
  return 0;
207
}
208
195
209
196
/* Hairy buffering mechanism for grep.  The intent is to keep
210
/* Hairy buffering mechanism for grep.  The intent is to keep
197
   all reads aligned on a page boundary and multiples of the
211
   all reads aligned on a page boundary and multiples of the
198
   page size. */
212
   page size. */
199
213
214
static char *ubuffer;		/* Unaligned base of buffer. */
200
static char *buffer;		/* Base of buffer. */
215
static char *buffer;		/* Base of buffer. */
201
static size_t bufsalloc;	/* Allocated size of buffer save region. */
216
static size_t bufsalloc;	/* Allocated size of buffer save region. */
202
static size_t bufalloc;		/* Total buffer size. */
217
static size_t bufalloc;		/* Total buffer size. */
203
static int bufdesc;		/* File descriptor. */
218
static int bufdesc;		/* File descriptor. */
204
static char *bufbeg;		/* Beginning of user-visible stuff. */
219
static char *bufbeg;		/* Beginning of user-visible stuff. */
205
static char *buflim;		/* Limit of user-visible stuff. */
220
static char *buflim;		/* Limit of user-visible stuff. */
221
static size_t pagesize;		/* alignment of memory pages */
206
222
207
#if defined(HAVE_WORKING_MMAP)
223
#if defined(HAVE_MMAP)
208
#include <sys/types.h>
209
#include <sys/stat.h>
210
#include <sys/mman.h>
211
212
static int bufmapped;		/* True for ordinary files. */
224
static int bufmapped;		/* True for ordinary files. */
213
static struct stat bufstat;	/* From fstat(). */
214
static off_t bufoffset;		/* What read() normally remembers. */
225
static off_t bufoffset;		/* What read() normally remembers. */
226
static off_t initial_bufoffset;	/* Initial value of bufoffset. */
215
#endif
227
#endif
216
228
217
#if HAVE_LIBZ > 0
229
/* Return VAL aligned to the next multiple of ALIGNMENT.  VAL can be
218
#include <zlib.h>
230
   an integer or a pointer.  Both args must be free of side effects.  */
219
static gzFile gzbufdesc;        /* zlib file descriptor. */
231
#define ALIGN_TO(val, alignment) \
220
static int  Zflag;              /* uncompress before searching */
232
  ((size_t) (val) % (alignment) == 0 \
233
   ? (val) \
234
   : (val) + ((alignment) - (size_t) (val) % (alignment)))
235
236
/* Return the address of a new page-aligned buffer of size SIZE.  Set
237
   *UP to the newly allocated (but possibly unaligned) buffer used to
238
   *build the aligned buffer.  To free the buffer, free (*UP).  */
239
static char *
240
page_alloc (size, up)
241
     size_t size;
242
     char **up;
243
{
244
  /* HAVE_WORKING_VALLOC means that valloc is properly declared, and
245
     you can free the result of valloc.  This symbol is not (yet)
246
     autoconfigured.  It can be useful to define HAVE_WORKING_VALLOC
247
     while debugging, since some debugging memory allocators might
248
     catch more bugs if this symbol is enabled.  */
249
#if HAVE_WORKING_VALLOC
250
  *up = valloc (size);
251
  return *up;
252
#else
253
  size_t asize = size + pagesize - 1;
254
  if (size <= asize)
255
    {
256
      *up = malloc (asize);
257
      if (*up)
258
	return ALIGN_TO (*up, pagesize);
259
    }
260
  return NULL;
221
#endif
261
#endif
262
}
222
263
223
/* Reset the buffer for a new file.  Initialize
264
/* Reset the buffer for a new file, returning zero if we should skip it.
224
   on the first time through. */
265
   Initialize on the first time through. */
225
void
266
static int
226
reset(fd)
267
reset (fd, file, stats)
227
     int fd;
268
     int fd;
269
     char const *file;
270
     struct stats *stats;
228
{
271
{
229
  static int initialized;
272
  if (pagesize == 0)
230
231
  if (!initialized)
232
    {
273
    {
233
      initialized = 1;
274
      size_t ubufsalloc;
275
      pagesize = getpagesize ();
276
      if (pagesize == 0)
277
	abort ();
234
#ifndef BUFSALLOC
278
#ifndef BUFSALLOC
235
      bufsalloc = MAX(8192, getpagesize());
279
      ubufsalloc = MAX (8192, pagesize);
236
#else
280
#else
237
      bufsalloc = BUFSALLOC;
281
      ubufsalloc = BUFSALLOC;
238
#endif
282
#endif
283
      bufsalloc = ALIGN_TO (ubufsalloc, pagesize);
239
      bufalloc = 5 * bufsalloc;
284
      bufalloc = 5 * bufsalloc;
240
      /* The 1 byte of overflow is a kludge for dfaexec(), which
285
      /* The 1 byte of overflow is a kludge for dfaexec(), which
241
	 inserts a sentinel newline at the end of the buffer
286
	 inserts a sentinel newline at the end of the buffer
242
	 being searched.  There's gotta be a better way... */
287
	 being searched.  There's gotta be a better way... */
243
      buffer = valloc(bufalloc + 1);
288
      if (bufsalloc < ubufsalloc
244
      if (!buffer)
289
	  || bufalloc / 5 != bufsalloc || bufalloc + 1 < bufalloc
245
	fatal("memory exhausted", 0);
290
	  || ! (buffer = page_alloc (bufalloc + 1, &ubuffer)))
291
	fatal (_("memory exhausted"), 0);
246
      bufbeg = buffer;
292
      bufbeg = buffer;
247
      buflim = buffer;
293
      buflim = buffer;
248
    }
294
    }
249
#if HAVE_LIBZ > 0
295
  bufdesc = fd;
250
  if (Zflag) {
296
251
    gzbufdesc = gzdopen(fd, "r");
252
    if (gzbufdesc == NULL) 
253
      fatal("memory exhausted", 0); 
254
  }
255
#endif
256
    bufdesc = fd;
257
#if defined(HAVE_WORKING_MMAP)
258
  if (
297
  if (
259
#if HAVE_LIBZ > 0
298
#if defined(HAVE_MMAP)
260
      Zflag || 
299
      1
300
#else
301
      directories != READ_DIRECTORIES
261
#endif
302
#endif
262
      fstat(fd, &bufstat) < 0 || !S_ISREG(bufstat.st_mode))
303
      )
304
    if (fstat (fd, &stats->stat) != 0)
305
      {
306
	error ("fstat", errno);
307
	return 0;
308
      }
309
  if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode))
310
    return 0;
311
#if defined(HAVE_MMAP)
312
  if (!S_ISREG (stats->stat.st_mode))
263
    bufmapped = 0;
313
    bufmapped = 0;
264
  else
314
  else
265
    {
315
    {
266
      bufmapped = 1;
316
      bufmapped = 1;
267
      bufoffset = lseek(fd, 0, 1);
317
      bufoffset = initial_bufoffset = file ? 0 : lseek (fd, 0, 1);
268
    }
318
    }
269
#endif
319
#endif
320
  return 1;
270
}
321
}
271
322
272
/* Read new stuff into the buffer, saving the specified
323
/* Read new stuff into the buffer, saving the specified
Lines 274-333 Link Here
274
   to the beginning of the buffer contents, and 'buflim'
325
   to the beginning of the buffer contents, and 'buflim'
275
   points just after the end.  Return count of new stuff. */
326
   points just after the end.  Return count of new stuff. */
276
static int
327
static int
277
fillbuf(save)
328
fillbuf (save, stats)
278
     size_t save;
329
     size_t save;
330
     struct stats *stats;
279
{
331
{
280
  char *nbuffer, *dp, *sp;
281
  int cc;
332
  int cc;
282
#if defined(HAVE_WORKING_MMAP)
333
#if defined(HAVE_MMAP)
283
  caddr_t maddr;
334
  caddr_t maddr;
284
#endif
335
#endif
285
  static int pagesize;
286
287
  if (pagesize == 0 && (pagesize = getpagesize()) == 0)
288
    abort();
289
336
290
  if (save > bufsalloc)
337
  if (save > bufsalloc)
291
    {
338
    {
339
      char *nubuffer;
340
      char *nbuffer;
341
292
      while (save > bufsalloc)
342
      while (save > bufsalloc)
293
	bufsalloc *= 2;
343
	bufsalloc *= 2;
294
      bufalloc = 5 * bufsalloc;
344
      bufalloc = 5 * bufsalloc;
295
      nbuffer = valloc(bufalloc + 1);
345
      if (bufalloc / 5 != bufsalloc || bufalloc + 1 < bufalloc
296
      if (!nbuffer)
346
	  || ! (nbuffer = page_alloc (bufalloc + 1, &nubuffer)))
297
	fatal("memory exhausted", 0);
347
	fatal (_("memory exhausted"), 0);
348
349
      bufbeg = nbuffer + bufsalloc - save;
350
      memcpy (bufbeg, buflim - save, save);
351
      free (ubuffer);
352
      ubuffer = nubuffer;
353
      buffer = nbuffer;
298
    }
354
    }
299
  else
355
  else
300
    nbuffer = buffer;
356
    {
301
357
      bufbeg = buffer + bufsalloc - save;
302
  sp = buflim - save;
358
      memcpy (bufbeg, buflim - save, save);
303
  dp = nbuffer + bufsalloc - save;
359
    }
304
  bufbeg = dp;
305
  while (save--)
306
    *dp++ = *sp++;
307
308
  /* We may have allocated a new, larger buffer.  Since
309
     there is no portable vfree(), we just have to forget
310
     about the old one.  Sorry. */
311
  buffer = nbuffer;
312
360
313
#if defined(HAVE_WORKING_MMAP)
361
#if defined(HAVE_MMAP)
314
  if (bufmapped && bufoffset % pagesize == 0
362
  if (bufmapped && bufoffset % pagesize == 0
315
      && bufstat.st_size - bufoffset >= bufalloc - bufsalloc)
363
      && stats->stat.st_size - bufoffset >= bufalloc - bufsalloc)
316
    {
364
    {
317
      maddr = buffer + bufsalloc;
365
      maddr = buffer + bufsalloc;
318
      maddr = mmap(maddr, bufalloc - bufsalloc, PROT_READ | PROT_WRITE,
366
      maddr = mmap (maddr, bufalloc - bufsalloc, PROT_READ | PROT_WRITE,
319
		   MAP_PRIVATE | MAP_FIXED, bufdesc, bufoffset);
367
		   MAP_PRIVATE | MAP_FIXED, bufdesc, bufoffset);
320
      if (maddr == (caddr_t) -1)
368
      if (maddr == (caddr_t) -1)
321
	{
369
	{
322
	  fprintf(stderr, "%s: warning: %s: %s\n", filename,
370
          /* This used to issue a warning, but on some hosts
323
		  strerror(errno));
371
             (e.g. Solaris 2.5) mmap can fail merely because some
372
             other process has an advisory read lock on the file.
373
             There's no point alarming the user about this misfeature.  */
374
#if 0
375
	  fprintf (stderr, _("%s: warning: %s: %s\n"), prog, filename,
376
		  strerror (errno));
377
#endif
324
	  goto tryread;
378
	  goto tryread;
325
	}
379
	}
326
#if 0
380
#if 0
327
      /* You might thing this (or MADV_WILLNEED) would help,
381
      /* You might thing this (or MADV_WILLNEED) would help,
328
	 but it doesn't, at least not on a Sun running 4.1.
382
	 but it doesn't, at least not on a Sun running 4.1.
329
	 In fact, it actually slows us down about 30%! */
383
	 In fact, it actually slows us down about 30%! */
330
      madvise(maddr, bufalloc - bufsalloc, MADV_SEQUENTIAL);
384
      madvise (maddr, bufalloc - bufsalloc, MADV_SEQUENTIAL);
331
#endif
385
#endif
332
      cc = bufalloc - bufsalloc;
386
      cc = bufalloc - bufsalloc;
333
      bufoffset += cc;
387
      bufoffset += cc;
Lines 341-362 Link Here
341
      if (bufmapped)
395
      if (bufmapped)
342
	{
396
	{
343
	  bufmapped = 0;
397
	  bufmapped = 0;
344
	  lseek(bufdesc, bufoffset, 0);
398
	  if (bufoffset != initial_bufoffset)
399
	    lseek (bufdesc, bufoffset, 0);
345
	}
400
	}
346
#if HAVE_LIBZ > 0
401
      cc = read (bufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
347
      if (Zflag) 
348
    	cc = gzread(gzbufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
349
      else	
350
#endif
351
        cc = read(bufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
352
    }
402
    }
353
#else
403
#else
354
#if HAVE_LIBZ > 0
404
  cc = read (bufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
355
  if (Zflag) 
405
#endif /*HAVE_MMAP*/
356
    cc = gzread(gzbufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
406
#if O_BINARY
357
  else
407
  if (cc > 0)
358
#endif
408
    cc = undossify_input (buffer + bufsalloc, cc);
359
    cc = read(bufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
360
#endif
409
#endif
361
  if (cc > 0)
410
  if (cc > 0)
362
    buflim = buffer + bufsalloc + cc;
411
    buflim = buffer + bufsalloc + cc;
Lines 366-371 Link Here
366
}
415
}
367
416
368
/* Flags controlling the style of output. */
417
/* Flags controlling the style of output. */
418
static int always_text;		/* Assume the input is always text. */
369
static int out_quiet;		/* Suppress all normal output. */
419
static int out_quiet;		/* Suppress all normal output. */
370
static int out_invert;		/* Print nonmatching stuff. */
420
static int out_invert;		/* Print nonmatching stuff. */
371
static int out_file;		/* Print filenames. */
421
static int out_file;		/* Print filenames. */
Lines 373-391 Link Here
373
static int out_byte;		/* Print byte offsets. */
423
static int out_byte;		/* Print byte offsets. */
374
static int out_before;		/* Lines of leading context. */
424
static int out_before;		/* Lines of leading context. */
375
static int out_after;		/* Lines of trailing context. */
425
static int out_after;		/* Lines of trailing context. */
376
static int count_matches;       /* print a count of matching lines */
426
static int count_matches;	/* Count matching lines.  */
427
static int list_files;		/* List matching files.  */
428
static int no_filenames;	/* Suppress file names.  */
429
static int suppress_errors;	/* Suppress diagnostics.  */
377
430
378
/* Internal variables to keep track of byte count, context, etc. */
431
/* Internal variables to keep track of byte count, context, etc. */
379
static size_t totalcc;		/* Total character count before bufbeg. */
432
static off_t totalcc;		/* Total character count before bufbeg. */
380
static char *lastnl;		/* Pointer after last newline counted. */
433
static char *lastnl;		/* Pointer after last newline counted. */
381
static char *lastout;		/* Pointer after last character output;
434
static char *lastout;		/* Pointer after last character output;
382
				   NULL if no character has been output
435
				   NULL if no character has been output
383
				   or if it's conceptually before bufbeg. */
436
				   or if it's conceptually before bufbeg. */
384
static size_t totalnl;		/* Total newline count before lastnl. */
437
static off_t totalnl;		/* Total newline count before lastnl. */
385
static int pending;		/* Pending lines of output. */
438
static int pending;		/* Pending lines of output. */
439
static int done_on_match;		/* Stop scanning file on first match */
440
441
#if O_BINARY
442
# include "dosbuf.c"
443
#endif
386
444
387
static void
445
static void
388
nlscan(lim)
446
nlscan (lim)
389
     char *lim;
447
     char *lim;
390
{
448
{
391
  char *beg;
449
  char *beg;
Lines 397-426 Link Here
397
}
455
}
398
456
399
static void
457
static void
400
prline(beg, lim, sep)
458
print_offset_sep (pos, sep)
459
     off_t pos;
460
     int sep;
461
{
462
  /* Do not rely on printf to print pos, since off_t may be longer than long,
463
     and long long is not portable.  */
464
465
  char buf[sizeof pos * CHAR_BIT];
466
  char *p = buf + sizeof buf - 1;
467
  *p = sep;
468
469
  do
470
    *--p = '0' + pos % 10;
471
  while ((pos /= 10) != 0);
472
473
  fwrite (p, 1, buf + sizeof buf - p, stdout);
474
}
475
476
static void
477
prline (beg, lim, sep)
401
     char *beg;
478
     char *beg;
402
     char *lim;
479
     char *lim;
403
     char sep;
480
     int sep;
404
{
481
{
405
  if (out_file)
482
  if (out_file)
406
    printf("%s%c", filename, sep);
483
    printf ("%s%c", filename, sep);
407
  if (out_line)
484
  if (out_line)
408
    {
485
    {
409
      nlscan(beg);
486
      nlscan (beg);
410
      printf("%d%c", ++totalnl, sep);
487
      print_offset_sep (++totalnl, sep);
411
      lastnl = lim;
488
      lastnl = lim;
412
    }
489
    }
413
  if (out_byte)
490
  if (out_byte)
414
    printf("%lu%c", (unsigned long)(totalcc + (beg - bufbeg)), sep);
491
    {
415
  fwrite(beg, 1, lim - beg, stdout);
492
      off_t pos = totalcc + (beg - bufbeg);
416
  if (ferror(stdout))
493
#if O_BINARY
417
    error("writing output", errno);
494
      pos = dossified_pos (pos);
495
#endif
496
      print_offset_sep (pos, sep);
497
    }
498
  fwrite (beg, 1, lim - beg, stdout);
499
  if (ferror (stdout))
500
    error (_("writing output"), errno);
418
  lastout = lim;
501
  lastout = lim;
419
}
502
}
420
503
421
/* Print pending lines of trailing context prior to LIM. */
504
/* Print pending lines of trailing context prior to LIM. */
422
static void
505
static void
423
prpending(lim)
506
prpending (lim)
424
     char *lim;
507
     char *lim;
425
{
508
{
426
  char *nl;
509
  char *nl;
Lines 430-447 Link Here
430
  while (pending > 0 && lastout < lim)
513
  while (pending > 0 && lastout < lim)
431
    {
514
    {
432
      --pending;
515
      --pending;
433
      if ((nl = memchr(lastout, '\n', lim - lastout)) != 0)
516
      if ((nl = memchr (lastout, '\n', lim - lastout)) != 0)
434
	++nl;
517
	++nl;
435
      else
518
      else
436
	nl = lim;
519
	nl = lim;
437
      prline(lastout, nl, '-');
520
      prline (lastout, nl, '-');
438
    }
521
    }
439
}
522
}
440
523
441
/* Print the lines between BEG and LIM.  Deal with context crap.
524
/* Print the lines between BEG and LIM.  Deal with context crap.
442
   If NLINESP is non-null, store a count of lines between BEG and LIM. */
525
   If NLINESP is non-null, store a count of lines between BEG and LIM. */
443
static void
526
static void
444
prtext(beg, lim, nlinesp)
527
prtext (beg, lim, nlinesp)
445
     char *beg;
528
     char *beg;
446
     char *lim;
529
     char *lim;
447
     int *nlinesp;
530
     int *nlinesp;
Lines 451-457 Link Here
451
  int i, n;
534
  int i, n;
452
535
453
  if (!out_quiet && pending > 0)
536
  if (!out_quiet && pending > 0)
454
    prpending(beg);
537
    prpending (beg);
455
538
456
  p = beg;
539
  p = beg;
457
540
Lines 469-480 Link Here
469
      /* We only print the "--" separator if our output is
552
      /* We only print the "--" separator if our output is
470
	 discontiguous from the last output in the file. */
553
	 discontiguous from the last output in the file. */
471
      if ((out_before || out_after) && used && p != lastout)
554
      if ((out_before || out_after) && used && p != lastout)
472
	puts("--");
555
	puts ("--");
473
556
474
      while (p < beg)
557
      while (p < beg)
475
	{
558
	{
476
	  nl = memchr(p, '\n', beg - p);
559
	  nl = memchr (p, '\n', beg - p);
477
	  prline(p, nl + 1, '-');
560
	  prline (p, nl + 1, '-');
478
	  p = nl + 1;
561
	  p = nl + 1;
479
	}
562
	}
480
    }
563
    }
Lines 484-502 Link Here
484
      /* Caller wants a line count. */
567
      /* Caller wants a line count. */
485
      for (n = 0; p < lim; ++n)
568
      for (n = 0; p < lim; ++n)
486
	{
569
	{
487
	  if ((nl = memchr(p, '\n', lim - p)) != 0)
570
	  if ((nl = memchr (p, '\n', lim - p)) != 0)
488
	    ++nl;
571
	    ++nl;
489
	  else
572
	  else
490
	    nl = lim;
573
	    nl = lim;
491
	  if (!out_quiet)
574
	  if (!out_quiet)
492
	    prline(p, nl, ':');
575
	    prline (p, nl, ':');
493
	  p = nl;
576
	  p = nl;
494
	}
577
	}
495
      *nlinesp = n;
578
      *nlinesp = n;
496
    }
579
    }
497
  else
580
  else
498
    if (!out_quiet)
581
    if (!out_quiet)
499
      prline(beg, lim, ':');
582
      prline (beg, lim, ':');
500
583
501
  pending = out_after;
584
  pending = out_after;
502
  used = 1;
585
  used = 1;
Lines 506-512 Link Here
506
   between matching lines if OUT_INVERT is true).  Return a count of
589
   between matching lines if OUT_INVERT is true).  Return a count of
507
   lines printed. */
590
   lines printed. */
508
static int
591
static int
509
grepbuf(beg, lim)
592
grepbuf (beg, lim)
510
     char *beg;
593
     char *beg;
511
     char *lim;
594
     char *lim;
512
{
595
{
Lines 523-581 Link Here
523
	break;
606
	break;
524
      if (!out_invert)
607
      if (!out_invert)
525
	{
608
	{
526
	  prtext(b, endp, (int *) 0);
609
	  prtext (b, endp, (int *) 0);
527
	  nlines += 1;
610
	  nlines += 1;
611
	  if (done_on_match)
612
	    return nlines;
528
	}
613
	}
529
      else if (p < b)
614
      else if (p < b)
530
	{
615
	{
531
	  prtext(p, b, &n);
616
	  prtext (p, b, &n);
532
	  nlines += n;
617
	  nlines += n;
533
	}
618
	}
534
      p = endp;
619
      p = endp;
535
    }
620
    }
536
  if (out_invert && p < lim)
621
  if (out_invert && p < lim)
537
    {
622
    {
538
      prtext(p, lim, &n);
623
      prtext (p, lim, &n);
539
      nlines += n;
624
      nlines += n;
540
    }
625
    }
541
  return nlines;
626
  return nlines;
542
}
627
}
543
628
544
629
/* Search a given file.  Normally, return a count of lines printed;
545
/*
630
   but if the file is a directory and we search it recursively, then
546
 * try to guess if buf belong to a binary file 
631
   return -2 if there was a match, and -1 otherwise.  */
547
 */
548
549
int isBinaryFile(buf, len)
550
     char *buf;
551
     int len;
552
{
553
#define BINARY_BUF_LEN 32
554
  int i;
555
556
  len = (len < BINARY_BUF_LEN ? len : BINARY_BUF_LEN);
557
558
  /* look for non-printable chars */
559
  for(i = 0; i < len; i++, buf++)
560
    if (!isprint(*buf) && !isspace(*buf))
561
      return(1);
562
  
563
  return(0);
564
}
565
566
567
568
/* Search a given file.  Return a count of lines printed. */
569
static int
632
static int
570
grep(fd)
633
grep (fd, file, stats)
571
     int fd;
634
     int fd;
635
     char const *file;
636
     struct stats *stats;
572
{
637
{
573
  int nlines, i;
638
  int nlines, i;
639
  int not_text;
574
  size_t residue, save;
640
  size_t residue, save;
575
  char *beg, *lim;
641
  char *beg, *lim;
576
  int first, cc;	
577
642
578
  reset(fd);
643
  if (!reset (fd, file, stats))
644
    return 0;
645
646
  if (file && directories == RECURSE_DIRECTORIES
647
      && S_ISDIR (stats->stat.st_mode))
648
    {
649
      /* Close fd now, so that we don't open a lot of file descriptors
650
	 when we recurse deeply.  */
651
      if (close (fd) != 0)
652
	error (file, errno);
653
      return grepdir (file, stats) - 2;
654
    }
579
655
580
  totalcc = 0;
656
  totalcc = 0;
581
  lastout = 0;
657
  lastout = 0;
Lines 585-606 Link Here
585
  nlines = 0;
661
  nlines = 0;
586
  residue = 0;
662
  residue = 0;
587
  save = 0;
663
  save = 0;
588
  first = 0;
589
  cc = 0;	
590
664
591
  for (;;)
665
  if (fillbuf (save, stats) < 0)
592
    {
666
    {
593
      if ((cc = fillbuf(save)) < 0)
667
      if (! (is_EISDIR (errno, file) && suppress_errors))
594
	{
668
	error (filename, errno);
595
	  error(filename, errno);
669
      return nlines;
596
	  return nlines;
670
    }
597
	}
598
671
599
	 /* skip binary files */
672
  not_text = (! (always_text | out_quiet)
600
      if (!first && aflag && isBinaryFile(bufbeg, cc))
673
	      && memchr (bufbeg, '\0', buflim - bufbeg));
601
         return(0);
674
  done_on_match += not_text;
602
      first++;
675
  out_quiet += not_text;
603
676
677
  for (;;)
678
    {
604
      lastnl = bufbeg;
679
      lastnl = bufbeg;
605
      if (lastout)
680
      if (lastout)
606
	lastout = bufbeg;
681
	lastout = bufbeg;
Lines 612-623 Link Here
612
      residue = buflim - lim;
687
      residue = buflim - lim;
613
      if (beg < lim)
688
      if (beg < lim)
614
	{
689
	{
615
	  nlines += grepbuf(beg, lim);
690
	  nlines += grepbuf (beg, lim);
616
	  if (pending)
691
	  if (pending)
617
	    prpending(lim);
692
	    prpending (lim);
618
	  /* optimization */
693
	  if (nlines && done_on_match && !out_invert)
619
	  if (nlines && out_quiet && !count_matches)
694
	    goto finish_grep;
620
	    return(nlines);
621
	}
695
	}
622
      i = 0;
696
      i = 0;
623
      beg = lim;
697
      beg = lim;
Lines 633-739 Link Here
633
      save = residue + lim - beg;
707
      save = residue + lim - beg;
634
      totalcc += buflim - bufbeg - save;
708
      totalcc += buflim - bufbeg - save;
635
      if (out_line)
709
      if (out_line)
636
	nlscan(beg);
710
	nlscan (beg);
711
      if (fillbuf (save, stats) < 0)
712
	{
713
	  if (! (is_EISDIR (errno, file) && suppress_errors))
714
	    error (filename, errno);
715
	  goto finish_grep;
716
	}
637
    }
717
    }
638
  if (residue)
718
  if (residue)
639
    {
719
    {
640
      nlines += grepbuf(bufbeg + save - residue, buflim);
720
      nlines += grepbuf (bufbeg + save - residue, buflim);
641
      if (pending)
721
      if (pending)
642
	prpending(buflim);
722
	prpending (buflim);
643
    }
723
    }
724
725
 finish_grep:
726
  done_on_match -= not_text;
727
  out_quiet -= not_text;
728
  if ((not_text & ~out_quiet) && nlines != 0)
729
    printf (_("Binary file %s matches\n"), filename);
644
  return nlines;
730
  return nlines;
645
}
731
}
646
732
647
static char version[] = "GNU grep version 2.0";
733
static int
648
	
734
grepfile (file, stats)
649
#define GETOPT_STD "0123456789A:B:CEFGLVX:abce:f:hilnqsvwxy"
735
     char const *file;
650
#if HAVE_FTS > 0
736
     struct stats *stats;
651
#define GETOPT_FTS "HPRS"
737
{
652
#else
738
  int desc;
653
#define GETOPT_FTS ""
739
  int count;
654
#endif
740
  int status;
655
#if HAVE_LIBZ > 0
741
656
#define GETOPT_Z "Z"
742
  if (! file)
657
#else
743
    {
658
#define GETOPT_Z ""
744
      desc = 0;
745
      filename = _("(standard input)");
746
    }
747
  else
748
    {
749
      desc = open (file, O_RDONLY);
750
751
      if (desc < 0)
752
	{
753
	  int e = errno;
754
	    
755
	  if (is_EISDIR (e, file) && directories == RECURSE_DIRECTORIES)
756
	    {
757
	      if (stat (file, &stats->stat) != 0)
758
		{
759
		  error (file, errno);
760
		  return 1;
761
		}
762
763
	      return grepdir (file, stats);
764
	    }
765
	      
766
	  if (!suppress_errors)
767
	    {
768
	      if (directories == SKIP_DIRECTORIES)
769
		switch (e)
770
		  {
771
#ifdef EISDIR
772
		  case EISDIR:
773
		    return 1;
774
#endif
775
		  case EACCES:
776
		    /* When skipping directories, don't worry about
777
		       directories that can't be opened.  */
778
		    if (stat (file, &stats->stat) == 0
779
			&& S_ISDIR (stats->stat.st_mode))
780
		      return 1;
781
		    break;
782
		  }
783
784
	      error (file, e);
785
	    }
786
787
	  return 1;
788
	}
789
790
      filename = file;
791
    }
792
793
#if O_BINARY
794
  /* Set input to binary mode.  Pipes are simulated with files
795
     on DOS, so this includes the case of "foo | grep bar".  */
796
  if (!isatty (desc))
797
    SET_BINARY (desc);
659
#endif
798
#endif
660
799
800
  count = grep (desc, file, stats);
801
  if (count < 0)
802
    status = count + 2;
803
  else
804
    {
805
      if (count_matches)
806
	{
807
	  if (out_file)
808
	    printf ("%s:", filename);
809
	  printf ("%d\n", count);
810
	}
811
812
      if (count)
813
	{
814
	  status = 0;
815
	  if (list_files == 1)
816
	    printf ("%s\n", filename);
817
	}
818
      else
819
	{
820
	  status = 1;
821
	  if (list_files == -1)
822
	    printf ("%s\n", filename);
823
	}
824
825
      if (file && close (desc) != 0)
826
	error (file, errno);
827
    }
828
829
  return status;
830
}
831
832
static int
833
grepdir (dir, stats)
834
     char const *dir;
835
     struct stats *stats;
836
{
837
  int status = 1;
838
  struct stats *ancestor;
839
  char *name_space;
840
841
  for (ancestor = stats;  (ancestor = ancestor->parent) != 0;  )
842
    if (! ((ancestor->stat.st_ino ^ stats->stat.st_ino)
843
	   | (ancestor->stat.st_dev ^ stats->stat.st_dev)))
844
      {
845
	if (!suppress_errors)
846
	  fprintf (stderr, _("%s: warning: %s: %s\n"), prog, dir,
847
		   _("recursive directory loop"));
848
	return 1;
849
      }
850
851
  name_space = savedir (dir, (unsigned) stats->stat.st_size);
852
853
  if (! name_space)
854
    {
855
      if (errno)
856
	{
857
	  if (!suppress_errors)
858
	    error (dir, errno);
859
	}
860
      else
861
	fatal (_("Memory exhausted"), 0);
862
    }
863
  else
864
    {
865
      size_t dirlen = strlen (dir);
866
      int needs_slash = ! (dirlen == FILESYSTEM_PREFIX_LEN (dir)
867
			   || IS_SLASH (dir[dirlen - 1]));
868
      char *file = NULL;
869
      char *namep = name_space;
870
      struct stats child;
871
      child.parent = stats;
872
      out_file += !no_filenames;
873
      while (*namep)
874
	{
875
	  size_t namelen = strlen (namep);
876
	  file = xrealloc (file, dirlen + 1 + namelen + 1);
877
	  strcpy (file, dir);
878
	  file[dirlen] = '/';
879
	  strcpy (file + dirlen + needs_slash, namep);
880
	  namep += namelen + 1;
881
	  status &= grepfile (file, &child);
882
	}
883
      out_file -= !no_filenames;
884
      if (file)
885
        free (file);
886
      free (name_space);
887
    }
888
889
  return status;
890
}
891
661
static void
892
static void
662
usage()
893
usage(status)
894
int status;
663
{
895
{
664
  fprintf(stderr, "usage: %s [-[AB] <num>] [-CEFGLVX%s%s%s", 
896
  if (status != 0)
665
	prog, GETOPT_FTS, GETOPT_Z,
897
    {
666
	"abchilnqsvwxy]\n       [-e <expr>] [-f file] [files ...]\n");
898
      fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"), prog);
667
  exit(2);
899
      fprintf (stderr, _("Try `%s --help' for more information.\n"), prog);
900
    }
901
  else
902
    {
903
      printf (_("Usage: %s [OPTION]... PATTERN [FILE] ...\n"), prog);
904
      printf (_("\
905
Search for PATTERN in each FILE or standard input.\n\
906
\n\
907
Regexp selection and interpretation:\n\
908
  -E, --extended-regexp     PATTERN is an extended regular expression\n\
909
  -F, --fixed-regexp        PATTERN is a fixed string separated by newlines\n\
910
  -G, --basic-regexp        PATTERN is a basic regular expression\n\
911
  -e, --regexp=PATTERN      use PATTERN as a regular expression\n\
912
  -f, --file=FILE           obtain PATTERN from FILE\n\
913
  -i, --ignore-case         ignore case distinctions\n\
914
  -w, --word-regexp         force PATTERN to match only whole words\n\
915
  -x, --line-regexp         force PATTERN to match only whole lines\n"));
916
      printf (_("\
917
\n\
918
Miscellaneous:\n\
919
  -s, --no-messages         suppress error messages\n\
920
  -v, --revert-match        select non-matching lines\n\
921
  -V, --version             print version information and exit\n\
922
      --help                display this help and exit\n"));
923
      printf (_("\
924
\n\
925
Output control:\n\
926
  -b, --byte-offset         print the byte offset with output lines\n\
927
  -n, --line-number         print line number with output lines\n\
928
  -H, --with-filename       print the filename for each match\n\
929
  -h, --no-filename         suppress the prefixing filename on output\n\
930
  -q, --quiet, --silent     suppress all normal output\n\
931
  -a, --text                do not suppress binary output\n\
932
  -d, --directories=ACTION  how to handle directories\n\
933
                            ACTION is 'read', 'recurse', or 'skip'.\n\
934
  -r, --recursive           equivalent to --directories=recurse.\n\
935
  -L, --files-without-match only print FILE names containing no match\n\
936
  -l, --files-with-matches  only print FILE names containing matches\n\
937
  -c, --count               only print a count of matching lines per FILE\n"));
938
      printf (_("\
939
\n\
940
Context control:\n\
941
  -B, --before-context=NUM  print NUM lines of leading context\n\
942
  -A, --after-context=NUM   print NUM lines of trailing context\n\
943
  -C, --context[=NUM]       print NUM (default 2) lines of output context\n\
944
                            unless overriden by -A or -B\n\
945
  -NUM                      same as --context=NUM\n\
946
  -U, --binary              do not strip CR characters at EOL (MSDOS)\n\
947
  -u, --unix-byte-offsets   report offsets as if CRs were not there (MSDOS)\n\
948
\n\
949
If no -[GEF], then `egrep' assumes -E, `fgrep' -F, else -G.\n\
950
With no FILE, or when FILE is -, read standard input. If less than\n\
951
two FILEs given, assume -h. Exit with 0 if matches, with 1 if none.\n\
952
Exit with 2 if syntax errors or system errors.\n"));
953
      printf (_("\nReport bugs to <bug-gnu-utils@gnu.org>.\n"));
954
    }
955
  exit (status);
668
}
956
}
669
957
670
/* Go through the matchers vector and look for the specified matcher.
958
/* Go through the matchers vector and look for the specified matcher.
671
   If we find it, install it in compile and execute, and return 1.  */
959
   If we find it, install it in compile and execute, and return 1.  */
672
int
960
static int
673
setmatcher(name)
961
setmatcher (name)
674
     char *name;
962
     char const *name;
675
{
963
{
676
  int i;
964
  int i;
965
#ifdef HAVE_SETRLIMIT
966
  struct rlimit rlim;
967
#endif
677
968
678
  for (i = 0; matchers[i].name; ++i)
969
  for (i = 0; matchers[i].name; ++i)
679
    if (strcmp(name, matchers[i].name) == 0)
970
    if (strcmp (name, matchers[i].name) == 0)
680
      {
971
      {
681
	compile = matchers[i].compile;
972
	compile = matchers[i].compile;
682
	execute = matchers[i].execute;
973
	execute = matchers[i].execute;
974
#if HAVE_SETRLIMIT && defined(RLIMIT_STACK)
975
	/* I think every platform needs to do this, so that regex.c
976
	   doesn't oveflow the stack.  The default value of
977
	   `re_max_failures' is too large for some platforms: it needs
978
	   more than 3MB-large stack.
979
980
	   The test for HAVE_SETRLIMIT should go into `configure'.  */
981
	if (!getrlimit (RLIMIT_STACK, &rlim))
982
	  {
983
	    long newlim;
984
	    extern long int re_max_failures; /* from regex.c */
985
986
	    /* Approximate the amount regex.c needs, plus some more.  */
987
	    newlim = re_max_failures * 2 * 20 * sizeof (char *);
988
	    if (newlim > rlim.rlim_max)
989
	      {
990
		newlim = rlim.rlim_max;
991
		re_max_failures = newlim / (2 * 20 * sizeof (char *));
992
	      }
993
	    if (rlim.rlim_cur < newlim)
994
	      rlim.rlim_cur = newlim;
995
996
	    setrlimit (RLIMIT_STACK, &rlim);
997
	  }
998
#endif
683
	return 1;
999
	return 1;
684
      }
1000
      }
685
  return 0;
1001
  return 0;
686
}
1002
}
687
1003
688
689
690
int
1004
int
691
main(argc, argv)
1005
main (argc, argv)
692
     int argc;
1006
     int argc;
693
     char *argv[];
1007
     char *argv[];
694
{
1008
{
695
  char *keys;
1009
  char *keys;
696
  size_t keycc, oldcc, keyalloc;
1010
  size_t keycc, oldcc, keyalloc;
697
  int keyfound, no_filenames, list_files, suppress_errors;
1011
  int with_filenames;
698
  int opt, cc, desc, count, status;
1012
  int opt, cc, status;
1013
  unsigned digit_args_val, default_context;
699
  FILE *fp;
1014
  FILE *fp;
700
  extern char *optarg;
1015
  extern char *optarg;
701
  extern int optind;
1016
  extern int optind;
702
#if HAVE_FTS > 0
703
  int Rflag, Hflag, Pflag, Lflag;
704
  FTS *ftsp;
705
  FTSENT *ftsent;
706
  int fts_options;
707
#endif
708
1017
709
#ifdef __FreeBSD__
1018
  initialize_main (&argc, &argv);
710
  (void) setlocale(LC_ALL, "");
711
#endif
712
  prog = argv[0];
1019
  prog = argv[0];
713
  if (prog && strrchr(prog, '/'))
1020
  if (prog && strrchr (prog, '/'))
714
    prog = strrchr(prog, '/') + 1;
1021
    prog = strrchr (prog, '/') + 1;
1022
1023
#if defined(__MSDOS__) || defined(_WIN32)
1024
  /* DOS and MS-Windows use backslashes as directory separators, and usually
1025
     have an .exe suffix.  They also have case-insensitive filesystems.  */
1026
  if (prog)
1027
    {
1028
      char *p = prog;
1029
      char *bslash = strrchr (argv[0], '\\');
1030
1031
      if (bslash && bslash >= prog) /* for mixed forward/backslash case */
1032
	prog = bslash + 1;
1033
      else if (prog == argv[0]
1034
	       && argv[0][0] && argv[0][1] == ':') /* "c:progname" */
1035
	prog = argv[0] + 2;
1036
1037
      /* Collapse the letter-case, so `strcmp' could be used hence.  */
1038
      for ( ; *p; p++)
1039
	if (*p >= 'A' && *p <= 'Z')
1040
	  *p += 'a' - 'A';
1041
1042
      /* Remove the .exe extension, if any.  */
1043
      if ((p = strrchr (prog, '.')) && strcmp (p, ".exe") == 0)
1044
	*p = '\0';
1045
    }
1046
#endif
715
1047
716
  keys = NULL;
1048
  keys = NULL;
717
  keycc = 0;
1049
  keycc = 0;
718
  keyfound = 0;
1050
  with_filenames = 0;
719
  count_matches = 0;
720
  no_filenames = 0;
721
  list_files = 0;
722
  suppress_errors = 0;
723
  matcher = NULL;
1051
  matcher = NULL;
724
  aflag = 0;
725
#if HAVE_FTS > 0
726
  Rflag = Hflag = Pflag = Lflag = 0;
727
#endif
728
#if HAVE_LIBZ > 0
729
    if (*prog == 'z') {
730
	prog++;
731
	Zflag = 1;
732
    }
733
#endif
734
1052
735
  while ((opt = getopt(argc, argv, 
1053
  /* The value -1 means to use DEFAULT_CONTEXT. */
736
      GETOPT_STD/**/GETOPT_FTS/**/GETOPT_Z)) != -1)
1054
  out_after = out_before = -1;
1055
  /* Default before/after context: chaged by -C/-NUM options */
1056
  default_context = 0;
1057
  /* Accumulated value of individual digits in a -NUM option */
1058
  digit_args_val = 0;
1059
1060
1061
/* Internationalization. */
1062
#if HAVE_SETLOCALE
1063
  setlocale (LC_ALL, "");
1064
#endif
1065
#if ENABLE_NLS
1066
  bindtextdomain (PACKAGE, LOCALEDIR);
1067
  textdomain (PACKAGE);
1068
#endif
1069
1070
  while ((opt = getopt_long (argc, argv,
1071
#if O_BINARY
1072
         "0123456789A:B:C::EFGHVX:abcd:e:f:hiLlnqrsvwxyUu",
1073
#else
1074
         "0123456789A:B:C::EFGHVX:abcd:e:f:hiLlnqrsvwxy",
1075
#endif
1076
         long_options, NULL)) != EOF)
737
    switch (opt)
1077
    switch (opt)
738
      {
1078
      {
739
      case '0':
1079
      case '0':
Lines 746-821 Link Here
746
      case '7':
1086
      case '7':
747
      case '8':
1087
      case '8':
748
      case '9':
1088
      case '9':
749
	out_before = 10 * out_before + opt - '0';
1089
	digit_args_val = 10 * digit_args_val + opt - '0';
750
	out_after = 10 * out_after + opt - '0';
1090
	default_context = digit_args_val;
751
	break;
1091
	break;
752
      case 'A':
1092
      case 'A':
753
	out_after = atoi(optarg);
1093
	if (optarg)
754
	if (out_after < 0)
1094
	  {
755
	  usage();
1095
	    if (ck_atoi (optarg, &out_after))
1096
	      fatal (_("invalid context length argument"), 0);
1097
	  }
756
	break;
1098
	break;
757
      case 'B':
1099
      case 'B':
758
	out_before = atoi(optarg);
1100
	if (optarg)
759
	if (out_before < 0)
1101
	  {
760
	  usage();
1102
	    if (ck_atoi (optarg, &out_before))
1103
	      fatal (_("invalid context length argument"), 0);
1104
	  }
761
	break;
1105
	break;
762
      case 'C':
1106
      case 'C':
763
	out_before = out_after = 2;
1107
	/* Set output match context, but let any explicit leading or
1108
	   trailing amount specified with -A or -B stand. */
1109
	if (optarg)
1110
	  {
1111
	    if (ck_atoi (optarg, &default_context))
1112
	      fatal (_("invalid context length argument"), 0);
1113
	  }
1114
	else
1115
	  default_context = 2;
764
	break;
1116
	break;
765
      case 'E':
1117
      case 'E':
766
	if (matcher && strcmp(matcher, "egrep") != 0)
1118
	if (matcher && strcmp (matcher, "posix-egrep") != 0)
767
	  fatal("you may specify only one of -E, -F, or -G", 0);
1119
	  fatal (_("you may specify only one of -E, -F, or -G"), 0);
768
	matcher = "posix-egrep";
1120
	matcher = "posix-egrep";
769
	break;
1121
	break;
770
      case 'F':
1122
      case 'F':
771
	if (matcher && strcmp(matcher, "fgrep") != 0)
1123
	if (matcher && strcmp(matcher, "fgrep") != 0)
772
	  fatal("you may specify only one of -E, -F, or -G", 0);;
1124
	  fatal(_("you may specify only one of -E, -F, or -G"), 0);;
773
	matcher = "fgrep";
1125
	matcher = "fgrep";
774
	break;
1126
	break;
775
      case 'G':
1127
      case 'G':
776
	if (matcher && strcmp(matcher, "grep") != 0)
1128
	if (matcher && strcmp (matcher, "grep") != 0)
777
	  fatal("you may specify only one of -E, -F, or -G", 0);
1129
	  fatal (_("you may specify only one of -E, -F, or -G"), 0);
778
	matcher = "grep";
1130
	matcher = "grep";
779
	break;
1131
	break;
780
      case 'V':
1132
      case 'H':
781
	fprintf(stderr, "%s\n", version);
1133
	with_filenames = 1;
782
	break;
1134
	break;
783
      case 'X':
1135
#if O_BINARY
784
	if (matcher)
1136
      case 'U':
785
	  fatal("matcher already specified", 0);
1137
	dos_use_file_type = DOS_BINARY;
786
	matcher = optarg;
787
	break;
1138
	break;
788
#if HAVE_LIBZ > 0
1139
      case 'u':
789
      case 'Z':
1140
	dos_report_unix_offset = 1;
790
	Zflag = 1;
791
	break;
1141
	break;
792
#endif
1142
#endif
793
#if HAVE_FTS > 0
1143
      case 'V':
794
	/* symbolic links on the command line are followed */
1144
	show_version = 1;
795
      case 'H': 
796
	Hflag = 1;
797
	Lflag = Pflag = 0;
798
	break;
799
800
	/* no symbolic links are followed */
801
      case 'P':
802
	Pflag = 1;
803
	Hflag = Lflag = 0;
804
	break;
805
806
	/* traverse file hierarchies */
807
      case 'R':
808
	Rflag = 1;
809
	break;
1145
	break;
810
1146
      case 'X':
811
	/* all symbolic links are followed */
1147
	if (matcher)
812
      case 'S':
1148
	  fatal (_("matcher already specified"), 0);
813
	Lflag = 1;
1149
	matcher = optarg;
814
	Hflag = Pflag = 0;
815
	break;
1150
	break;
816
#endif
817
      case 'a':
1151
      case 'a':
818
	aflag = 1;
1152
	always_text = 1;
819
	break;
1153
	break;
820
      case 'b':
1154
      case 'b':
821
	out_byte = 1;
1155
	out_byte = 1;
Lines 824-861 Link Here
824
	out_quiet = 1;
1158
	out_quiet = 1;
825
	count_matches = 1;
1159
	count_matches = 1;
826
	break;
1160
	break;
1161
      case 'd':
1162
	if (strcmp (optarg, "read") == 0)
1163
	  directories = READ_DIRECTORIES;
1164
	else if (strcmp (optarg, "skip") == 0)
1165
	  directories = SKIP_DIRECTORIES;
1166
	else if (strcmp (optarg, "recurse") == 0)
1167
	  directories = RECURSE_DIRECTORIES;
1168
	else
1169
	  fatal (_("unknown directories method"), 0);
1170
	break;
827
      case 'e':
1171
      case 'e':
828
	cc = strlen(optarg);
1172
	cc = strlen (optarg);
829
	keys = xrealloc(keys, keycc + cc + 1);
1173
	keys = xrealloc (keys, keycc + cc + 1);
830
	if (keyfound)
1174
	strcpy (&keys[keycc], optarg);
831
	  keys[keycc++] = '\n';
832
	strcpy(&keys[keycc], optarg);
833
	keycc += cc;
1175
	keycc += cc;
834
	keyfound = 1;
1176
	keys[keycc++] = '\n';
835
	break;
1177
	break;
836
      case 'f':
1178
      case 'f':
837
	fp = strcmp(optarg, "-") != 0 ? fopen(optarg, "r") : stdin;
1179
	fp = strcmp (optarg, "-") != 0 ? fopen (optarg, "r") : stdin;
838
	if (!fp)
1180
	if (!fp)
839
	  fatal(optarg, errno);
1181
	  fatal (optarg, errno);
840
	for (keyalloc = 1; keyalloc <= keycc; keyalloc *= 2)
1182
	for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2)
841
	  ;
1183
	  ;
842
	keys = xrealloc(keys, keyalloc);
1184
	keys = xrealloc (keys, keyalloc);
843
	oldcc = keycc;
1185
	oldcc = keycc;
844
	if (keyfound)
1186
	while (!feof (fp)
845
	  keys[keycc++] = '\n';
1187
	       && (cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) > 0)
846
	while (!feof(fp)
847
	       && (cc = fread(keys + keycc, 1, keyalloc - keycc, fp)) > 0)
848
	  {
1188
	  {
849
	    keycc += cc;
1189
	    keycc += cc;
850
	    if (keycc == keyalloc)
1190
	    if (keycc == keyalloc - 1)
851
	      keys = xrealloc(keys, keyalloc *= 2);
1191
	      keys = xrealloc (keys, keyalloc *= 2);
852
	  }
1192
	  }
853
	if (fp != stdin)
1193
	if (fp != stdin)
854
	  fclose(fp);
1194
	  fclose(fp);
855
	/* Nuke the final newline to avoid matching a null string. */
1195
	/* Append final newline if file ended in non-newline. */
856
	if (keycc - oldcc > 0 && keys[keycc - 1] == '\n')
1196
	if (oldcc != keycc && keys[keycc - 1] != '\n')
857
	  --keycc;
1197
	  keys[keycc++] = '\n';
858
	keyfound = 1;
859
	break;
1198
	break;
860
      case 'h':
1199
      case 'h':
861
	no_filenames = 1;
1200
	no_filenames = 1;
Lines 869-885 Link Here
869
	   Inspired by the same option in Hume's gre. */
1208
	   Inspired by the same option in Hume's gre. */
870
	out_quiet = 1;
1209
	out_quiet = 1;
871
	list_files = -1;
1210
	list_files = -1;
1211
	done_on_match = 1;
872
	break;
1212
	break;
873
      case 'l':
1213
      case 'l':
874
	out_quiet = 1;
1214
	out_quiet = 1;
875
	list_files = 1;
1215
	list_files = 1;
1216
	done_on_match = 1;
876
	break;
1217
	break;
877
      case 'n':
1218
      case 'n':
878
	out_line = 1;
1219
	out_line = 1;
879
	break;
1220
	break;
880
      case 'q':
1221
      case 'q':
1222
	done_on_match = 1;
881
	out_quiet = 1;
1223
	out_quiet = 1;
882
	break;
1224
	break;
1225
      case 'r':
1226
	directories = RECURSE_DIRECTORIES;
1227
	break;
883
      case 's':
1228
      case 's':
884
	suppress_errors = 1;
1229
	suppress_errors = 1;
885
	break;
1230
	break;
Lines 892-1103 Link Here
892
      case 'x':
1237
      case 'x':
893
	match_lines = 1;
1238
	match_lines = 1;
894
	break;
1239
	break;
1240
      case 0:
1241
	/* long options */
1242
	break;
895
      default:
1243
      default:
896
	usage();
1244
	usage (2);
897
	break;
1245
	break;
898
      }
1246
      }
899
1247
900
  if (!keyfound)
1248
  if (out_after < 0)
1249
    out_after = default_context;
1250
  if (out_before < 0)
1251
    out_before = default_context;
1252
1253
  if (show_version)
1254
    {
1255
      printf (_("grep (GNU grep) %s\n"), VERSION);
1256
      printf ("\n");
1257
      printf (_("\
1258
Copyright (C) 1988, 1992-1998, 1999 Free Software Foundation, Inc.\n"));
1259
      printf (_("\
1260
This is free software; see the source for copying conditions. There is NO\n\
1261
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"));
1262
      printf ("\n");
1263
      exit (0);
1264
    }
1265
1266
  if (show_help)
1267
    usage (0);
1268
1269
  if (keys)
1270
    {
1271
      if (keycc == 0)
1272
	/* No keys were specified (e.g. -f /dev/null).  Match nothing.  */
1273
        out_invert ^= 1;
1274
      else
1275
	/* Strip trailing newline. */
1276
        --keycc;
1277
    }
1278
  else
901
    if (optind < argc)
1279
    if (optind < argc)
902
      {
1280
      {
903
	keys = argv[optind++];
1281
	keys = argv[optind++];
904
	keycc = strlen(keys);
1282
	keycc = strlen (keys);
905
      }
1283
      }
906
    else
1284
    else
907
      usage();
1285
      usage (2);
908
1286
909
  if (!matcher)
1287
  if (! matcher)
910
    matcher = prog;
1288
    matcher = default_matcher;
911
1289
912
  if (!setmatcher(matcher) && !setmatcher("default"))
1290
  if (!setmatcher (matcher) && !setmatcher ("default"))
913
    abort();
1291
    abort ();
914
1292
915
  (*compile)(keys, keycc);
1293
  (*compile)(keys, keycc);
916
1294
917
#if HAVE_FTS > 0
1295
  if ((argc - optind > 1 && !no_filenames) || with_filenames)
918
  if ((argc - optind > 1 || Rflag) && !no_filenames)
919
#else
920
  if (argc - optind > 1 && !no_filenames)
921
#endif
922
    out_file = 1;
1296
    out_file = 1;
923
1297
924
  status = 1;
1298
#if O_BINARY
925
1299
  /* Output is set to binary mode because we shouldn't convert
926
#if HAVE_FTS > 0
1300
     NL to CR-LF pairs, especially when grepping binary files.  */
927
  if (Rflag) {
1301
  if (!isatty (1))
928
    fts_options = FTS_PHYSICAL | FTS_NOCHDIR;
1302
    SET_BINARY (1);
929
930
    if (Hflag)
931
      fts_options |= FTS_COMFOLLOW;
932
933
    if (Lflag) {
934
      fts_options |= FTS_LOGICAL;
935
      fts_options &= ~FTS_PHYSICAL;
936
    }
937
938
    if (Pflag) {
939
      fts_options &= ~FTS_LOGICAL & ~FTS_COMFOLLOW;
940
      fts_options |= FTS_PHYSICAL;
941
    }      
942
  }
943
944
  if (Rflag && optind < argc) {
945
    int i;
946
947
    /* replace "-" with "/dev/stdin" */
948
    for (i = optind; i < argc; i++)
949
      if (strcmp(argv[i], "-") == 0)
950
	*(argv + i) = "/dev/stdin";
951
952
    if ((ftsp = fts_open(argv + optind, fts_options,  
953
			 (int(*)())NULL)) == NULL) {
954
      if (!suppress_errors)
955
	error("", errno);
956
    } else {
957
958
      while((ftsent = fts_read(ftsp)) != NULL) {
959
	filename = ftsent->fts_accpath;
960
961
	switch(ftsent->fts_info) {
962
963
	  /* regular file */
964
	case FTS_F:
965
	  break;
966
967
	  /* directory */
968
	case FTS_D:
969
	case FTS_DC:
970
	case FTS_DP:
971
	  continue; break;
972
973
	  /* errors */
974
	case FTS_DNR:
975
	  error(filename, errno);
976
	  continue; break;
977
978
	case FTS_ERR:
979
	case FTS_NS:
980
	  error(filename, ftsent->fts_errno);
981
	  continue; break;
982
983
	  /* dead symlink */
984
	case FTS_SLNONE:
985
	  continue; break;
986
987
	  /* symlink, don't skip */
988
	case FTS_SL:
989
	  break;
990
991
	default:
992
	  /* 
993
	  if (!suppress_errors)
994
	    fprintf(stderr, "%s: ignored\n", filename);
995
	  continue; break;
996
	  */
997
998
	}
999
1000
	if ((desc = open(filename, O_RDONLY)) == -1) {
1001
	  error(filename, errno);
1002
	  continue;
1003
	}
1004
1005
	count = grep(desc);
1006
	if (count_matches)
1007
	  {
1008
	    if (out_file)
1009
	      printf("%s:", filename);
1010
	    printf("%d\n", count);
1011
	  }
1012
	if (count)
1013
	  {
1014
	    status = 0;
1015
	    if (list_files == 1)
1016
	      printf("%s\n", filename);
1017
	  }
1018
	else if (list_files == -1)
1019
	  printf("%s\n", filename);
1020
1021
	if (desc != STDIN_FILENO) {
1022
#if HAVE_LIBZ > 0
1023
	  if (Zflag)
1024
	    gzclose(gzbufdesc);
1025
	  else
1026
#endif
1303
#endif
1027
	  close(desc);
1028
	}
1029
      }
1030
1304
1031
      if (fts_close(ftsp) == -1)
1032
	error("fts_close", errno);
1033
    }
1034
1035
  /* ! Rflag */
1036
  } else
1037
1038
#endif /* HAVE_FTS */
1039
1305
1040
  /* search in file names from arguments, not from stdin */
1041
  if (optind < argc)
1306
  if (optind < argc)
1042
1043
    while (optind < argc)
1044
      {
1045
	desc = strcmp(argv[optind], "-") ? 
1046
	    open(argv[optind], O_RDONLY) : STDIN_FILENO;
1047
	if (desc < 0)
1048
	  {
1049
	    if (!suppress_errors)
1050
	      error(argv[optind], errno);
1051
	  }
1052
	else
1053
	  {
1054
	    filename = desc == STDIN_FILENO ? 
1055
		"(standard input)" : argv[optind];
1056
	    count = grep(desc);
1057
	    if (count_matches)
1058
	      {
1059
		if (out_file)
1060
		  printf("%s:", filename);
1061
		printf("%d\n", count);
1062
	      }
1063
	    if (count)
1064
	      {
1065
		status = 0;
1066
		if (list_files == 1)
1067
		  printf("%s\n", filename);
1068
	      }
1069
	    else if (list_files == -1)
1070
	      printf("%s\n", filename);
1071
1072
	    if (desc != STDIN_FILENO) {
1073
#if HAVE_LIBZ > 0
1074
	      if (Zflag)
1075
	        gzclose(gzbufdesc);
1076
	    else
1077
#endif
1078
	      close(desc);
1079
1080
	    }
1081
          }
1082
	++optind;
1083
      }
1084
1085
  /* read input from stdin */
1086
  else
1087
    {
1307
    {
1088
      filename = "(standard input)";
1308
	status = 1;
1089
      count = grep(STDIN_FILENO);
1309
	do
1090
      if (count_matches)
1091
	printf("%d\n", count);
1092
      if (count)
1093
	{
1310
	{
1094
	  status = 0;
1311
	  char *file = argv[optind];
1095
	  if (list_files == 1)
1312
	  status &= grepfile (strcmp (file, "-") == 0 ? (char *) NULL : file,
1096
	    printf("(standard input)\n");
1313
			      &stats_base);
1097
	}
1314
	}
1098
      else if (list_files == -1)
1315
	while ( ++optind < argc);
1099
	printf("(standard input)\n");
1100
    }
1316
    }
1317
  else
1318
    status = grepfile ((char *) NULL, &stats_base);
1319
1320
  if (fclose (stdout) == EOF)
1321
    error (_("writing output"), errno);
1101
1322
1102
  exit(errseen ? 2 : status);
1323
  exit (errseen ? 2 : status);
1103
}
1324
}
(-)grep/grep.h (-20 / +15 lines)
Lines 1-5 Link Here
1
/* grep.h - interface to grep driver for searching subroutines.
1
/* grep.h - interface to grep driver for searching subroutines.
2
   Copyright (C) 1992 Free Software Foundation, Inc.
2
   Copyright (C) 1992, 1998 Free Software Foundation, Inc.
3
3
4
   This program is free software; you can redistribute it and/or modify
4
   This program is free software; you can redistribute it and/or modify
5
   it under the terms of the GNU General Public License as published by
5
   it under the terms of the GNU General Public License as published by
Lines 13-23 Link Here
13
13
14
   You should have received a copy of the GNU General Public License
14
   You should have received a copy of the GNU General Public License
15
   along with this program; if not, write to the Free Software
15
   along with this program; if not, write to the Free Software
16
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
16
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
17
   02111-1307, USA.  */
17
18
18
#if __STDC__
19
#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 6) || __STRICT_ANSI__
20
# define __attribute__(x)
21
#endif
19
22
20
extern void fatal(const char *, int);
23
extern void fatal PARAMS ((const char *, int)) __attribute__((noreturn));
24
extern char *xmalloc PARAMS ((size_t size));
25
extern char *xrealloc PARAMS ((char *ptr, size_t size));
21
26
22
/* Grep.c expects the matchers vector to be terminated
27
/* Grep.c expects the matchers vector to be terminated
23
   by an entry with a NULL name, and to contain at least
28
   by an entry with a NULL name, and to contain at least
Lines 26-50 Link Here
26
extern struct matcher
31
extern struct matcher
27
{
32
{
28
  char *name;
33
  char *name;
29
  void (*compile)(char *, size_t);
34
  void (*compile) PARAMS ((char *, size_t));
30
  char *(*execute)(char *, size_t, char **);
35
  char *(*execute) PARAMS ((char *, size_t, char **));
31
} matchers[];
36
} matchers[];
32
37
33
#else
34
35
extern void fatal();
36
37
extern struct matcher
38
{
39
  char *name;
40
  void (*compile)();
41
  char *(*execute)();
42
} matchers[];
43
44
#endif
45
46
/* Exported from grep.c. */
38
/* Exported from grep.c. */
47
extern char *matcher;
39
extern char const *matcher;
40
41
/* Exported from fgrepmat.c, egrepmat.c, grepmat.c.  */
42
extern char const default_matcher[];
48
43
49
/* The following flags are exported from grep for the matchers
44
/* The following flags are exported from grep for the matchers
50
   to look at. */
45
   to look at. */
(-)grep/grepmat.c (+6 lines)
Line 0 Link Here
1
#ifdef HAVE_CONFIG_H
2
# include <config.h>
3
#endif
4
#include "system.h"
5
#include "grep.h"
6
char const default_matcher[] = "grep";
(-)grep/kwset.c (-38 / +35 lines)
Lines 1-10 Link Here
1
/* kwset.c - search for any of a set of keywords.
1
/* kwset.c - search for any of a set of keywords.
2
   Copyright 1989 Free Software Foundation
2
   Copyright (C) 1989, 1998 Free Software Foundation, Inc.
3
		  Written August 1989 by Mike Haertel.
4
3
5
   This program is free software; you can redistribute it and/or modify
4
   This program is free software; you can redistribute it and/or modify
6
   it under the terms of the GNU General Public License as published by
5
   it under the terms of the GNU General Public License as published by
7
   the Free Software Foundation; either version 1, or (at your option)
6
   the Free Software Foundation; either version 2, or (at your option)
8
   any later version.
7
   any later version.
9
8
10
   This program is distributed in the hope that it will be useful,
9
   This program is distributed in the hope that it will be useful,
Lines 14-21 Link Here
14
13
15
   You should have received a copy of the GNU General Public License
14
   You should have received a copy of the GNU General Public License
16
   along with this program; if not, write to the Free Software
15
   along with this program; if not, write to the Free Software
17
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
17
   02111-1307, USA.  */
18
18
19
/* Written August 1989 by Mike Haertel.
19
   The author may be reached (Email) at the address mike@ai.mit.edu,
20
   The author may be reached (Email) at the address mike@ai.mit.edu,
20
   or (US mail) as Mike Haertel c/o Free Software Foundation. */
21
   or (US mail) as Mike Haertel c/o Free Software Foundation. */
21
22
Lines 27-69 Link Here
27
   String Matching:  An Aid to Bibliographic Search," CACM June 1975,
28
   String Matching:  An Aid to Bibliographic Search," CACM June 1975,
28
   Vol. 18, No. 6, which describes the failure function used below. */
29
   Vol. 18, No. 6, which describes the failure function used below. */
29
30
30
31
#ifdef HAVE_CONFIG_H
31
#ifdef STDC_HEADERS
32
# include <config.h>
32
#include <limits.h>
33
#include <stdlib.h>
34
#else
35
#define INT_MAX 2147483647
36
#define UCHAR_MAX 255
37
#ifdef __STDC__
38
#include <stddef.h>
39
#else
40
#include <sys/types.h>
41
#endif
42
extern char *malloc();
43
extern void free();
44
#endif
45
46
#ifdef HAVE_MEMCHR
47
#include <string.h>
48
#ifdef NEED_MEMORY_H
49
#include <memory.h>
50
#endif
51
#else
52
#ifdef __STDC__
53
extern void *memchr();
54
#else
55
extern char *memchr();
56
#endif
57
#endif
33
#endif
34
#include <sys/types.h>
35
#include "system.h"
36
#include "kwset.h"
37
#include "obstack.h"
58
38
59
#ifdef GREP
39
#ifdef GREP
60
extern char *xmalloc();
40
extern char *xmalloc();
61
#define malloc xmalloc
41
# undef malloc
42
# define malloc xmalloc
62
#endif
43
#endif
63
44
64
#include "kwset.h"
65
#include "obstack.h"
66
67
#define NCHAR (UCHAR_MAX + 1)
45
#define NCHAR (UCHAR_MAX + 1)
68
#define obstack_chunk_alloc malloc
46
#define obstack_chunk_alloc malloc
69
#define obstack_chunk_free free
47
#define obstack_chunk_free free
Lines 106-111 Link Here
106
  char *trans;			/* Character translation table. */
84
  char *trans;			/* Character translation table. */
107
};
85
};
108
86
87
/* prototypes */
88
static void enqueue PARAMS((struct tree *, struct trie **));
89
static void treefails PARAMS((register struct tree *, struct trie *, struct trie *));
90
static void treedelta PARAMS((register struct tree *,register unsigned int, unsigned char *));
91
static int  hasevery PARAMS((register struct tree *, register struct tree *));
92
static void treenext PARAMS((struct tree *, struct trie **));
93
static char * bmexec PARAMS((kwset_t, char *, size_t));
94
static char * cwexec PARAMS((kwset_t, char *, size_t, struct kwsmatch *));
95
109
/* Allocate and initialize a keyword set object, returning an opaque
96
/* Allocate and initialize a keyword set object, returning an opaque
110
   pointer to it.  Return NULL if memory is not available. */
97
   pointer to it.  Return NULL if memory is not available. */
111
kwset_t
98
kwset_t
Lines 194-206 Link Here
194
	  link = (struct tree *) obstack_alloc(&kwset->obstack,
181
	  link = (struct tree *) obstack_alloc(&kwset->obstack,
195
					       sizeof (struct tree));
182
					       sizeof (struct tree));
196
	  if (!link)
183
	  if (!link)
197
	    return "memory exhausted";
184
	    return _("memory exhausted");
198
	  link->llink = 0;
185
	  link->llink = 0;
199
	  link->rlink = 0;
186
	  link->rlink = 0;
200
	  link->trie = (struct trie *) obstack_alloc(&kwset->obstack,
187
	  link->trie = (struct trie *) obstack_alloc(&kwset->obstack,
201
						     sizeof (struct trie));
188
						     sizeof (struct trie));
202
	  if (!link->trie)
189
	  if (!link->trie)
203
	    return "memory exhausted";
190
	    return _("memory exhausted");
204
	  link->trie->accepting = 0;
191
	  link->trie->accepting = 0;
205
	  link->trie->links = 0;
192
	  link->trie->links = 0;
206
	  link->trie->parent = trie;
193
	  link->trie->parent = trie;
Lines 249-254 Link Here
249
		      r->balance = t->balance != (char) -1 ? 0 : 1;
236
		      r->balance = t->balance != (char) -1 ? 0 : 1;
250
		      t->balance = 0;
237
		      t->balance = 0;
251
		      break;
238
		      break;
239
		    default:
240
		      abort ();
252
		    }
241
		    }
253
		  break;
242
		  break;
254
		case 2:
243
		case 2:
Lines 267-274 Link Here
267
		      r->balance = t->balance != (char) -1 ? 0 : 1;
256
		      r->balance = t->balance != (char) -1 ? 0 : 1;
268
		      t->balance = 0;
257
		      t->balance = 0;
269
		      break;
258
		      break;
259
		    default:
260
		      abort ();
270
		    }
261
		    }
271
		  break;
262
		  break;
263
		default:
264
		  abort ();
272
		}
265
		}
273
266
274
	      if (dirs[depth - 1] == L)
267
	      if (dirs[depth - 1] == L)
Lines 591-597 Link Here
591
      d = d1[U((tp += d)[-1])];
584
      d = d1[U((tp += d)[-1])];
592
      if (d != 0)
585
      if (d != 0)
593
	continue;
586
	continue;
594
      if (tp[-2] == gc)
587
      if (U(tp[-2]) == gc)
595
	{
588
	{
596
	  for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
589
	  for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
597
	    ;
590
	    ;
Lines 620-625 Link Here
620
  register char *end, *qlim;
613
  register char *end, *qlim;
621
  register struct tree *tree;
614
  register struct tree *tree;
622
  register char *trans;
615
  register char *trans;
616
617
#ifdef lint
618
  accept = NULL;
619
#endif
623
620
624
  /* Initialize register copies and look for easy ways out. */
621
  /* Initialize register copies and look for easy ways out. */
625
  kwset = (struct kwset *) kws;
622
  kwset = (struct kwset *) kws;
(-)grep/kwset.h (-23 / +11 lines)
Lines 1-10 Link Here
1
/* kwset.h - header declaring the keyword set library.
1
/* kwset.h - header declaring the keyword set library.
2
   Copyright 1989 Free Software Foundation
2
   Copyright (C) 1989, 1998 Free Software Foundation, Inc.
3
		  Written August 1989 by Mike Haertel.
4
3
5
   This program is free software; you can redistribute it and/or modify
4
   This program is free software; you can redistribute it and/or modify
6
   it under the terms of the GNU General Public License as published by
5
   it under the terms of the GNU General Public License as published by
7
   the Free Software Foundation; either version 1, or (at your option)
6
   the Free Software Foundation; either version 2, or (at your option)
8
   any later version.
7
   any later version.
9
8
10
   This program is distributed in the hope that it will be useful,
9
   This program is distributed in the hope that it will be useful,
Lines 14-21 Link Here
14
13
15
   You should have received a copy of the GNU General Public License
14
   You should have received a copy of the GNU General Public License
16
   along with this program; if not, write to the Free Software
15
   along with this program; if not, write to the Free Software
17
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
17
   02111-1307, USA.  */
18
18
19
/* Written August 1989 by Mike Haertel.
19
   The author may be reached (Email) at the address mike@ai.mit.edu,
20
   The author may be reached (Email) at the address mike@ai.mit.edu,
20
   or (US mail) as Mike Haertel c/o Free Software Foundation. */
21
   or (US mail) as Mike Haertel c/o Free Software Foundation. */
21
22
Lines 26-49 Link Here
26
  size_t size[1];		/* Length of each submatch. */
27
  size_t size[1];		/* Length of each submatch. */
27
};
28
};
28
29
29
#if __STDC__
30
typedef ptr_t kwset_t;
30
31
typedef void *kwset_t;
32
31
33
/* Return an opaque pointer to a newly allocated keyword set, or NULL
32
/* Return an opaque pointer to a newly allocated keyword set, or NULL
34
   if enough memory cannot be obtained.  The argument if non-NULL
33
   if enough memory cannot be obtained.  The argument if non-NULL
35
   specifies a table of character translations to be applied to all
34
   specifies a table of character translations to be applied to all
36
   pattern and search text. */
35
   pattern and search text. */
37
extern kwset_t kwsalloc(char *);
36
extern kwset_t kwsalloc PARAMS((char *));
38
37
39
/* Incrementally extend the keyword set to include the given string.
38
/* Incrementally extend the keyword set to include the given string.
40
   Return NULL for success, or an error message.  Remember an index
39
   Return NULL for success, or an error message.  Remember an index
41
   number for each keyword included in the set. */
40
   number for each keyword included in the set. */
42
extern char *kwsincr(kwset_t, char *, size_t);
41
extern char *kwsincr PARAMS((kwset_t, char *, size_t));
43
42
44
/* When the keyword set has been completely built, prepare it for
43
/* When the keyword set has been completely built, prepare it for
45
   use.  Return NULL for success, or an error message. */
44
   use.  Return NULL for success, or an error message. */
46
extern char *kwsprep(kwset_t);
45
extern char *kwsprep PARAMS((kwset_t));
47
46
48
/* Search through the given buffer for a member of the keyword set.
47
/* Search through the given buffer for a member of the keyword set.
49
   Return a pointer to the leftmost longest match found, or NULL if
48
   Return a pointer to the leftmost longest match found, or NULL if
Lines 51-69 Link Here
51
   the matching substring in the integer it points to.  Similarly,
50
   the matching substring in the integer it points to.  Similarly,
52
   if foundindex is non-NULL, store the index of the particular
51
   if foundindex is non-NULL, store the index of the particular
53
   keyword found therein. */
52
   keyword found therein. */
54
extern char *kwsexec(kwset_t, char *, size_t, struct kwsmatch *);
53
extern char *kwsexec PARAMS((kwset_t, char *, size_t, struct kwsmatch *));
55
54
56
/* Deallocate the given keyword set and all its associated storage. */
55
/* Deallocate the given keyword set and all its associated storage. */
57
extern void kwsfree(kwset_t);
56
extern void kwsfree PARAMS((kwset_t));
58
59
#else
60
61
typedef char *kwset_t;
62
63
extern kwset_t kwsalloc();
64
extern char *kwsincr();
65
extern char *kwsprep();
66
extern char *kwsexec();
67
extern void kwsfree();
68
57
69
#endif
(-)grep/memchr.c (+198 lines)
Line 0 Link Here
1
/* Copyright (C) 1991, 1993, 1998 Free Software Foundation, Inc.
2
   Based on strlen implemention by Torbjorn Granlund (tege@sics.se),
3
   with help from Dan Sahlin (dan@sics.se) and
4
   commentary by Jim Blandy (jimb@ai.mit.edu);
5
   adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
6
   and implemented by Roland McGrath (roland@ai.mit.edu).
7
8
NOTE: The canonical source of this file is maintained with the GNU C Library.
9
Bugs can be reported to bug-glibc@prep.ai.mit.edu.
10
11
This program is free software; you can redistribute it and/or modify it
12
under the terms of the GNU General Public License as published by the
13
Free Software Foundation; either version 2, or (at your option) any
14
later version.
15
16
This program is distributed in the hope that it will be useful,
17
but WITHOUT ANY WARRANTY; without even the implied warranty of
18
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19
GNU General Public License for more details.
20
21
You should have received a copy of the GNU General Public License
22
along with this program; if not, write to the Free Software
23
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
24
25
#ifdef HAVE_CONFIG_H
26
#include <config.h>
27
#endif
28
29
#undef __ptr_t
30
#if defined (__cplusplus) || (defined (__STDC__) && __STDC__)
31
# define __ptr_t void *
32
#else /* Not C++ or ANSI C.  */
33
# define __ptr_t char *
34
#endif /* C++ or ANSI C.  */
35
36
#if defined (_LIBC)
37
# include <string.h>
38
#endif
39
40
#if defined (HAVE_LIMITS_H) || defined (_LIBC)
41
# include <limits.h>
42
#endif
43
44
#define LONG_MAX_32_BITS 2147483647
45
46
#ifndef LONG_MAX
47
#define LONG_MAX LONG_MAX_32_BITS
48
#endif
49
50
#include <sys/types.h>
51
52
53
/* Search no more than N bytes of S for C.  */
54
55
__ptr_t
56
memchr (s, c, n)
57
     const __ptr_t s;
58
     int c;
59
     size_t n;
60
{
61
  const unsigned char *char_ptr;
62
  const unsigned long int *longword_ptr;
63
  unsigned long int longword, magic_bits, charmask;
64
65
  c = (unsigned char) c;
66
67
  /* Handle the first few characters by reading one character at a time.
68
     Do this until CHAR_PTR is aligned on a longword boundary.  */
69
  for (char_ptr = (const unsigned char *) s;
70
       n > 0 && ((unsigned long int) char_ptr
71
		 & (sizeof (longword) - 1)) != 0;
72
       --n, ++char_ptr)
73
    if (*char_ptr == c)
74
      return (__ptr_t) char_ptr;
75
76
  /* All these elucidatory comments refer to 4-byte longwords,
77
     but the theory applies equally well to 8-byte longwords.  */
78
79
  longword_ptr = (unsigned long int *) char_ptr;
80
81
  /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
82
     the "holes."  Note that there is a hole just to the left of
83
     each byte, with an extra at the end:
84
85
     bits:  01111110 11111110 11111110 11111111
86
     bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
87
88
     The 1-bits make sure that carries propagate to the next 0-bit.
89
     The 0-bits provide holes for carries to fall into.  */
90
91
  if (sizeof (longword) != 4 && sizeof (longword) != 8)
92
    abort ();
93
94
#if LONG_MAX <= LONG_MAX_32_BITS
95
  magic_bits = 0x7efefeff;
96
#else
97
  magic_bits = ((unsigned long int) 0x7efefefe << 32) | 0xfefefeff;
98
#endif
99
100
  /* Set up a longword, each of whose bytes is C.  */
101
  charmask = c | (c << 8);
102
  charmask |= charmask << 16;
103
#if LONG_MAX > LONG_MAX_32_BITS
104
  charmask |= charmask << 32;
105
#endif
106
107
  /* Instead of the traditional loop which tests each character,
108
     we will test a longword at a time.  The tricky part is testing
109
     if *any of the four* bytes in the longword in question are zero.  */
110
  while (n >= sizeof (longword))
111
    {
112
      /* We tentatively exit the loop if adding MAGIC_BITS to
113
	 LONGWORD fails to change any of the hole bits of LONGWORD.
114
115
	 1) Is this safe?  Will it catch all the zero bytes?
116
	 Suppose there is a byte with all zeros.  Any carry bits
117
	 propagating from its left will fall into the hole at its
118
	 least significant bit and stop.  Since there will be no
119
	 carry from its most significant bit, the LSB of the
120
	 byte to the left will be unchanged, and the zero will be
121
	 detected.
122
123
	 2) Is this worthwhile?  Will it ignore everything except
124
	 zero bytes?  Suppose every byte of LONGWORD has a bit set
125
	 somewhere.  There will be a carry into bit 8.  If bit 8
126
	 is set, this will carry into bit 16.  If bit 8 is clear,
127
	 one of bits 9-15 must be set, so there will be a carry
128
	 into bit 16.  Similarly, there will be a carry into bit
129
	 24.  If one of bits 24-30 is set, there will be a carry
130
	 into bit 31, so all of the hole bits will be changed.
131
132
	 The one misfire occurs when bits 24-30 are clear and bit
133
	 31 is set; in this case, the hole at bit 31 is not
134
	 changed.  If we had access to the processor carry flag,
135
	 we could close this loophole by putting the fourth hole
136
	 at bit 32!
137
138
	 So it ignores everything except 128's, when they're aligned
139
	 properly.
140
141
	 3) But wait!  Aren't we looking for C, not zero?
142
	 Good point.  So what we do is XOR LONGWORD with a longword,
143
	 each of whose bytes is C.  This turns each byte that is C
144
	 into a zero.  */
145
146
      longword = *longword_ptr++ ^ charmask;
147
148
      /* Add MAGIC_BITS to LONGWORD.  */
149
      if ((((longword + magic_bits)
150
151
	    /* Set those bits that were unchanged by the addition.  */
152
	    ^ ~longword)
153
154
	   /* Look at only the hole bits.  If any of the hole bits
155
	      are unchanged, most likely one of the bytes was a
156
	      zero.  */
157
	   & ~magic_bits) != 0)
158
	{
159
	  /* Which of the bytes was C?  If none of them were, it was
160
	     a misfire; continue the search.  */
161
162
	  const unsigned char *cp = (const unsigned char *) (longword_ptr - 1);
163
164
	  if (cp[0] == c)
165
	    return (__ptr_t) cp;
166
	  if (cp[1] == c)
167
	    return (__ptr_t) &cp[1];
168
	  if (cp[2] == c)
169
	    return (__ptr_t) &cp[2];
170
	  if (cp[3] == c)
171
	    return (__ptr_t) &cp[3];
172
#if LONG_MAX > 2147483647
173
	  if (cp[4] == c)
174
	    return (__ptr_t) &cp[4];
175
	  if (cp[5] == c)
176
	    return (__ptr_t) &cp[5];
177
	  if (cp[6] == c)
178
	    return (__ptr_t) &cp[6];
179
	  if (cp[7] == c)
180
	    return (__ptr_t) &cp[7];
181
#endif
182
	}
183
184
      n -= sizeof (longword);
185
    }
186
187
  char_ptr = (const unsigned char *) longword_ptr;
188
189
  while (n-- > 0)
190
    {
191
      if (*char_ptr == c)
192
	return (__ptr_t) char_ptr;
193
      else
194
	++char_ptr;
195
    }
196
197
  return 0;
198
}
(-)grep/obstack.c (-42 / +181 lines)
Lines 1-5 Link Here
1
/* obstack.c - subroutines used implicitly by object stack macros
1
/* obstack.h - object stack macros
2
   Copyright (C) 1988, 1993 Free Software Foundation, Inc.
2
   Copyright (C) 1988,89,90,91,92,93,94,96,97, 98 Free Software Foundation, Inc.
3
4
   the C library, however.  The master source lives in /gd/gnu/lib.
5
6
NOTE: The canonical source of this file is maintained with the
7
GNU C Library.  Bugs can be reported to bug-glibc@prep.ai.mit.edu.
3
8
4
This program is free software; you can redistribute it and/or modify it
9
This program is free software; you can redistribute it and/or modify it
5
under the terms of the GNU General Public License as published by the
10
under the terms of the GNU General Public License as published by the
Lines 12-37 Link Here
12
GNU General Public License for more details.
17
GNU General Public License for more details.
13
18
14
You should have received a copy of the GNU General Public License
19
You should have received a copy of the GNU General Public License
15
along with this program; if not, write to the Free Software
20
along with this program; if not, write to the Free Software Foundation,
16
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
21
Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22
23
#ifdef HAVE_CONFIG_H
24
#include <config.h>
25
#endif
17
26
18
#include "obstack.h"
27
#include "obstack.h"
19
28
20
/* This is just to get __GNU_LIBRARY__ defined.  */
29
/* NOTE BEFORE MODIFYING THIS FILE: This version number must be
21
#include <stdio.h>
30
   incremented whenever callers compiled using an old obstack.h can no
31
   longer properly call the functions in this obstack.c.  */
32
#define OBSTACK_INTERFACE_VERSION 1
22
33
23
/* Comment out all this code if we are using the GNU C Library, and are not
34
/* Comment out all this code if we are using the GNU C Library, and are not
24
   actually compiling the library itself.  This code is part of the GNU C
35
   actually compiling the library itself, and the installed library
25
   Library, but also included in many other GNU distributions.  Compiling
36
   supports the same library interface we do.  This code is part of the GNU
37
   C Library, but also included in many other GNU distributions.  Compiling
26
   and linking in this code is a waste when using the GNU C library
38
   and linking in this code is a waste when using the GNU C library
27
   (especially if it is a shared library).  Rather than having every GNU
39
   (especially if it is a shared library).  Rather than having every GNU
28
   program understand `configure --with-gnu-libc' and omit the object files,
40
   program understand `configure --with-gnu-libc' and omit the object
29
   it is simpler to just do this in the source for each such file.  */
41
   files, it is simpler to just do this in the source for each such file.  */
42
43
#include <stdio.h>		/* Random thing to get __GNU_LIBRARY__.  */
44
#if !defined (_LIBC) && defined (__GNU_LIBRARY__) && __GNU_LIBRARY__ > 1
45
#include <gnu-versions.h>
46
#if _GNU_OBSTACK_INTERFACE_VERSION == OBSTACK_INTERFACE_VERSION
47
#define ELIDE_CODE
48
#endif
49
#endif
50
30
51
31
#if defined (_LIBC) || !defined (__GNU_LIBRARY__)
52
#ifndef ELIDE_CODE
32
53
33
54
34
#ifdef __STDC__
55
#if defined (__STDC__) && __STDC__
35
#define POINTER void *
56
#define POINTER void *
36
#else
57
#else
37
#define POINTER char *
58
#define POINTER char *
Lines 40-46 Link Here
40
/* Determine default alignment.  */
61
/* Determine default alignment.  */
41
struct fooalign {char x; double d;};
62
struct fooalign {char x; double d;};
42
#define DEFAULT_ALIGNMENT  \
63
#define DEFAULT_ALIGNMENT  \
43
  ((PTR_INT_TYPE) ((char *)&((struct fooalign *) 0)->d - (char *)0))
64
  ((PTR_INT_TYPE) ((char *) &((struct fooalign *) 0)->d - (char *) 0))
44
/* If malloc were really smart, it would round addresses to DEFAULT_ALIGNMENT.
65
/* If malloc were really smart, it would round addresses to DEFAULT_ALIGNMENT.
45
   But in fact it might be less smart and round addresses to as much as
66
   But in fact it might be less smart and round addresses to as much as
46
   DEFAULT_ROUNDING.  So we prepare for it to do that.  */
67
   DEFAULT_ROUNDING.  So we prepare for it to do that.  */
Lines 55-60 Link Here
55
#define COPYING_UNIT int
76
#define COPYING_UNIT int
56
#endif
77
#endif
57
78
79
80
/* The functions allocating more room by calling `obstack_chunk_alloc'
81
   jump to the handler pointed to by `obstack_alloc_failed_handler'.
82
   This variable by default points to the internal function
83
   `print_and_abort'.  */
84
#if defined (__STDC__) && __STDC__
85
static void print_and_abort (void);
86
void (*obstack_alloc_failed_handler) (void) = print_and_abort;
87
#else
88
static void print_and_abort ();
89
void (*obstack_alloc_failed_handler) () = print_and_abort;
90
#endif
91
92
/* Exit value used when `print_and_abort' is used.  */
93
#if defined __GNU_LIBRARY__ || defined HAVE_STDLIB_H
94
#include <stdlib.h>
95
#endif
96
#ifndef EXIT_FAILURE
97
#define EXIT_FAILURE 1
98
#endif
99
int obstack_exit_failure = EXIT_FAILURE;
100
58
/* The non-GNU-C macros copy the obstack into this global variable
101
/* The non-GNU-C macros copy the obstack into this global variable
59
   to avoid multiple evaluation.  */
102
   to avoid multiple evaluation.  */
60
103
Lines 66-102 Link Here
66
   For free, do not use ?:, since some compilers, like the MIPS compilers,
109
   For free, do not use ?:, since some compilers, like the MIPS compilers,
67
   do not allow (expr) ? void : void.  */
110
   do not allow (expr) ? void : void.  */
68
111
112
#if defined (__STDC__) && __STDC__
69
#define CALL_CHUNKFUN(h, size) \
113
#define CALL_CHUNKFUN(h, size) \
70
  (((h) -> use_extra_arg) \
114
  (((h) -> use_extra_arg) \
71
   ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \
115
   ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \
72
   : (*(h)->chunkfun) ((size)))
116
   : (*(struct _obstack_chunk *(*) (long)) (h)->chunkfun) ((size)))
73
117
74
#define CALL_FREEFUN(h, old_chunk) \
118
#define CALL_FREEFUN(h, old_chunk) \
75
  do { \
119
  do { \
76
    if ((h) -> use_extra_arg) \
120
    if ((h) -> use_extra_arg) \
77
      (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \
121
      (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \
78
    else \
122
    else \
79
      (*(h)->freefun) ((old_chunk)); \
123
      (*(void (*) (void *)) (h)->freefun) ((old_chunk)); \
80
  } while (0)
124
  } while (0)
125
#else
126
#define CALL_CHUNKFUN(h, size) \
127
  (((h) -> use_extra_arg) \
128
   ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \
129
   : (*(struct _obstack_chunk *(*) ()) (h)->chunkfun) ((size)))
130
131
#define CALL_FREEFUN(h, old_chunk) \
132
  do { \
133
    if ((h) -> use_extra_arg) \
134
      (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \
135
    else \
136
      (*(void (*) ()) (h)->freefun) ((old_chunk)); \
137
  } while (0)
138
#endif
81
139
82
140
83
/* Initialize an obstack H for use.  Specify chunk size SIZE (0 means default).
141
/* Initialize an obstack H for use.  Specify chunk size SIZE (0 means default).
84
   Objects start on multiples of ALIGNMENT (0 means use default).
142
   Objects start on multiples of ALIGNMENT (0 means use default).
85
   CHUNKFUN is the function to use to allocate chunks,
143
   CHUNKFUN is the function to use to allocate chunks,
86
   and FREEFUN the function to free them.  */
144
   and FREEFUN the function to free them.
87
145
88
void
146
   Return nonzero if successful, zero if out of memory.
147
   To recover from an out of memory error,
148
   free up some memory, then call this again.  */
149
150
int
89
_obstack_begin (h, size, alignment, chunkfun, freefun)
151
_obstack_begin (h, size, alignment, chunkfun, freefun)
90
     struct obstack *h;
152
     struct obstack *h;
91
     int size;
153
     int size;
92
     int alignment;
154
     int alignment;
155
#if defined (__STDC__) && __STDC__
156
     POINTER (*chunkfun) (long);
157
     void (*freefun) (void *);
158
#else
93
     POINTER (*chunkfun) ();
159
     POINTER (*chunkfun) ();
94
     void (*freefun) ();
160
     void (*freefun) ();
161
#endif
95
{
162
{
96
  register struct _obstack_chunk* chunk; /* points to new chunk */
163
  register struct _obstack_chunk *chunk; /* points to new chunk */
97
164
98
  if (alignment == 0)
165
  if (alignment == 0)
99
    alignment = DEFAULT_ALIGNMENT;
166
    alignment = (int) DEFAULT_ALIGNMENT;
100
  if (size == 0)
167
  if (size == 0)
101
    /* Default size is what GNU malloc can fit in a 4096-byte block.  */
168
    /* Default size is what GNU malloc can fit in a 4096-byte block.  */
102
    {
169
    {
Lines 114-147 Link Here
114
      size = 4096 - extra;
181
      size = 4096 - extra;
115
    }
182
    }
116
183
184
#if defined (__STDC__) && __STDC__
185
  h->chunkfun = (struct _obstack_chunk * (*)(void *, long)) chunkfun;
186
  h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun;
187
#else
117
  h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;
188
  h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;
118
  h->freefun = freefun;
189
  h->freefun = freefun;
190
#endif
119
  h->chunk_size = size;
191
  h->chunk_size = size;
120
  h->alignment_mask = alignment - 1;
192
  h->alignment_mask = alignment - 1;
121
  h->use_extra_arg = 0;
193
  h->use_extra_arg = 0;
122
194
123
  chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
195
  chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
196
  if (!chunk)
197
    (*obstack_alloc_failed_handler) ();
124
  h->next_free = h->object_base = chunk->contents;
198
  h->next_free = h->object_base = chunk->contents;
125
  h->chunk_limit = chunk->limit
199
  h->chunk_limit = chunk->limit
126
    = (char *) chunk + h->chunk_size;
200
    = (char *) chunk + h->chunk_size;
127
  chunk->prev = 0;
201
  chunk->prev = 0;
128
  /* The initial chunk now contains no empty object.  */
202
  /* The initial chunk now contains no empty object.  */
129
  h->maybe_empty_object = 0;
203
  h->maybe_empty_object = 0;
204
  h->alloc_failed = 0;
205
  return 1;
130
}
206
}
131
207
132
void
208
int
133
_obstack_begin_1 (h, size, alignment, chunkfun, freefun, arg)
209
_obstack_begin_1 (h, size, alignment, chunkfun, freefun, arg)
134
     struct obstack *h;
210
     struct obstack *h;
135
     int size;
211
     int size;
136
     int alignment;
212
     int alignment;
213
#if defined (__STDC__) && __STDC__
214
     POINTER (*chunkfun) (POINTER, long);
215
     void (*freefun) (POINTER, POINTER);
216
#else
137
     POINTER (*chunkfun) ();
217
     POINTER (*chunkfun) ();
138
     void (*freefun) ();
218
     void (*freefun) ();
219
#endif
139
     POINTER arg;
220
     POINTER arg;
140
{
221
{
141
  register struct _obstack_chunk* chunk; /* points to new chunk */
222
  register struct _obstack_chunk *chunk; /* points to new chunk */
142
223
143
  if (alignment == 0)
224
  if (alignment == 0)
144
    alignment = DEFAULT_ALIGNMENT;
225
    alignment = (int) DEFAULT_ALIGNMENT;
145
  if (size == 0)
226
  if (size == 0)
146
    /* Default size is what GNU malloc can fit in a 4096-byte block.  */
227
    /* Default size is what GNU malloc can fit in a 4096-byte block.  */
147
    {
228
    {
Lines 159-178 Link Here
159
      size = 4096 - extra;
240
      size = 4096 - extra;
160
    }
241
    }
161
242
243
#if defined(__STDC__) && __STDC__
244
  h->chunkfun = (struct _obstack_chunk * (*)(void *,long)) chunkfun;
245
  h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun;
246
#else
162
  h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;
247
  h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;
163
  h->freefun = freefun;
248
  h->freefun = freefun;
249
#endif
164
  h->chunk_size = size;
250
  h->chunk_size = size;
165
  h->alignment_mask = alignment - 1;
251
  h->alignment_mask = alignment - 1;
166
  h->extra_arg = arg;
252
  h->extra_arg = arg;
167
  h->use_extra_arg = 1;
253
  h->use_extra_arg = 1;
168
254
169
  chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
255
  chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
256
  if (!chunk)
257
    (*obstack_alloc_failed_handler) ();
170
  h->next_free = h->object_base = chunk->contents;
258
  h->next_free = h->object_base = chunk->contents;
171
  h->chunk_limit = chunk->limit
259
  h->chunk_limit = chunk->limit
172
    = (char *) chunk + h->chunk_size;
260
    = (char *) chunk + h->chunk_size;
173
  chunk->prev = 0;
261
  chunk->prev = 0;
174
  /* The initial chunk now contains no empty object.  */
262
  /* The initial chunk now contains no empty object.  */
175
  h->maybe_empty_object = 0;
263
  h->maybe_empty_object = 0;
264
  h->alloc_failed = 0;
265
  return 1;
176
}
266
}
177
267
178
/* Allocate a new current chunk for the obstack *H
268
/* Allocate a new current chunk for the obstack *H
Lines 186-197 Link Here
186
     struct obstack *h;
276
     struct obstack *h;
187
     int length;
277
     int length;
188
{
278
{
189
  register struct _obstack_chunk*	old_chunk = h->chunk;
279
  register struct _obstack_chunk *old_chunk = h->chunk;
190
  register struct _obstack_chunk*	new_chunk;
280
  register struct _obstack_chunk *new_chunk;
191
  register long	new_size;
281
  register long	new_size;
192
  register int obj_size = h->next_free - h->object_base;
282
  register long obj_size = h->next_free - h->object_base;
193
  register int i;
283
  register long i;
194
  int already;
284
  long already;
195
285
196
  /* Compute size for new chunk.  */
286
  /* Compute size for new chunk.  */
197
  new_size = (obj_size + length) + (obj_size >> 3) + 100;
287
  new_size = (obj_size + length) + (obj_size >> 3) + 100;
Lines 199-205 Link Here
199
    new_size = h->chunk_size;
289
    new_size = h->chunk_size;
200
290
201
  /* Allocate and initialize the new chunk.  */
291
  /* Allocate and initialize the new chunk.  */
202
  new_chunk = h->chunk = CALL_CHUNKFUN (h, new_size);
292
  new_chunk = CALL_CHUNKFUN (h, new_size);
293
  if (!new_chunk)
294
    (*obstack_alloc_failed_handler) ();
295
  h->chunk = new_chunk;
203
  new_chunk->prev = old_chunk;
296
  new_chunk->prev = old_chunk;
204
  new_chunk->limit = h->chunk_limit = (char *) new_chunk + new_size;
297
  new_chunk->limit = h->chunk_limit = (char *) new_chunk + new_size;
205
298
Lines 242-260 Link Here
242
   This is here for debugging.
335
   This is here for debugging.
243
   If you use it in a program, you are probably losing.  */
336
   If you use it in a program, you are probably losing.  */
244
337
338
#if defined (__STDC__) && __STDC__
339
/* Suppress -Wmissing-prototypes warning.  We don't want to declare this in
340
   obstack.h because it is just for debugging.  */
341
int _obstack_allocated_p (struct obstack *h, POINTER obj);
342
#endif
343
245
int
344
int
246
_obstack_allocated_p (h, obj)
345
_obstack_allocated_p (h, obj)
247
     struct obstack *h;
346
     struct obstack *h;
248
     POINTER obj;
347
     POINTER obj;
249
{
348
{
250
  register struct _obstack_chunk*  lp;	/* below addr of any objects in this chunk */
349
  register struct _obstack_chunk *lp;	/* below addr of any objects in this chunk */
251
  register struct _obstack_chunk*  plp;	/* point to previous chunk if any */
350
  register struct _obstack_chunk *plp;	/* point to previous chunk if any */
252
351
253
  lp = (h)->chunk;
352
  lp = (h)->chunk;
254
  /* We use >= rather than > since the object cannot be exactly at
353
  /* We use >= rather than > since the object cannot be exactly at
255
     the beginning of the chunk but might be an empty object exactly
354
     the beginning of the chunk but might be an empty object exactly
256
     at the end of an adjacent chunk. */
355
     at the end of an adjacent chunk.  */
257
  while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj))
356
  while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj))
258
    {
357
    {
259
      plp = lp->prev;
358
      plp = lp->prev;
260
      lp = plp;
359
      lp = plp;
Lines 275-288 Link Here
275
     struct obstack *h;
374
     struct obstack *h;
276
     POINTER obj;
375
     POINTER obj;
277
{
376
{
278
  register struct _obstack_chunk*  lp;	/* below addr of any objects in this chunk */
377
  register struct _obstack_chunk *lp;	/* below addr of any objects in this chunk */
279
  register struct _obstack_chunk*  plp;	/* point to previous chunk if any */
378
  register struct _obstack_chunk *plp;	/* point to previous chunk if any */
280
379
281
  lp = h->chunk;
380
  lp = h->chunk;
282
  /* We use >= because there cannot be an object at the beginning of a chunk.
381
  /* We use >= because there cannot be an object at the beginning of a chunk.
283
     But there can be an empty object at that address
382
     But there can be an empty object at that address
284
     at the end of another chunk.  */
383
     at the end of another chunk.  */
285
  while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj))
384
  while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj))
286
    {
385
    {
287
      plp = lp->prev;
386
      plp = lp->prev;
288
      CALL_FREEFUN (h, lp);
387
      CALL_FREEFUN (h, lp);
Lines 293-299 Link Here
293
    }
392
    }
294
  if (lp)
393
  if (lp)
295
    {
394
    {
296
      h->object_base = h->next_free = (char *)(obj);
395
      h->object_base = h->next_free = (char *) (obj);
297
      h->chunk_limit = lp->limit;
396
      h->chunk_limit = lp->limit;
298
      h->chunk = lp;
397
      h->chunk = lp;
299
    }
398
    }
Lines 309-322 Link Here
309
     struct obstack *h;
408
     struct obstack *h;
310
     POINTER obj;
409
     POINTER obj;
311
{
410
{
312
  register struct _obstack_chunk*  lp;	/* below addr of any objects in this chunk */
411
  register struct _obstack_chunk *lp;	/* below addr of any objects in this chunk */
313
  register struct _obstack_chunk*  plp;	/* point to previous chunk if any */
412
  register struct _obstack_chunk *plp;	/* point to previous chunk if any */
314
413
315
  lp = h->chunk;
414
  lp = h->chunk;
316
  /* We use >= because there cannot be an object at the beginning of a chunk.
415
  /* We use >= because there cannot be an object at the beginning of a chunk.
317
     But there can be an empty object at that address
416
     But there can be an empty object at that address
318
     at the end of another chunk.  */
417
     at the end of another chunk.  */
319
  while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj))
418
  while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj))
320
    {
419
    {
321
      plp = lp->prev;
420
      plp = lp->prev;
322
      CALL_FREEFUN (h, lp);
421
      CALL_FREEFUN (h, lp);
Lines 327-333 Link Here
327
    }
426
    }
328
  if (lp)
427
  if (lp)
329
    {
428
    {
330
      h->object_base = h->next_free = (char *)(obj);
429
      h->object_base = h->next_free = (char *) (obj);
331
      h->chunk_limit = lp->limit;
430
      h->chunk_limit = lp->limit;
332
      h->chunk = lp;
431
      h->chunk = lp;
333
    }
432
    }
Lines 336-341 Link Here
336
    abort ();
435
    abort ();
337
}
436
}
338
437
438
int
439
_obstack_memory_used (h)
440
     struct obstack *h;
441
{
442
  register struct _obstack_chunk* lp;
443
  register int nbytes = 0;
444
445
  for (lp = h->chunk; lp != 0; lp = lp->prev)
446
    {
447
      nbytes += lp->limit - (char *) lp;
448
    }
449
  return nbytes;
450
}
451
452
/* Define the error handler.  */
453
#ifndef _
454
# ifdef HAVE_LIBINTL_H
455
#  include <libintl.h>
456
#  ifndef _
457
#   define _(Str) gettext (Str)
458
#  endif
459
# else
460
#  define _(Str) (Str)
461
# endif
462
#endif
463
464
static void
465
print_and_abort ()
466
{
467
  fputs (_("memory exhausted\n"), stderr);
468
  exit (obstack_exit_failure);
469
}
470
339
#if 0
471
#if 0
340
/* These are now turned off because the applications do not use it
472
/* These are now turned off because the applications do not use it
341
   and it uses bcopy via obstack_grow, which causes trouble on sysV.  */
473
   and it uses bcopy via obstack_grow, which causes trouble on sysV.  */
Lines 343-349 Link Here
343
/* Now define the functional versions of the obstack macros.
475
/* Now define the functional versions of the obstack macros.
344
   Define them to simply use the corresponding macros to do the job.  */
476
   Define them to simply use the corresponding macros to do the job.  */
345
477
346
#ifdef __STDC__
478
#if defined (__STDC__) && __STDC__
347
/* These function definitions do not work with non-ANSI preprocessors;
479
/* These function definitions do not work with non-ANSI preprocessors;
348
   they won't pass through the macro names in parentheses.  */
480
   they won't pass through the macro names in parentheses.  */
349
481
Lines 374-379 Link Here
374
  return obstack_room (obstack);
506
  return obstack_room (obstack);
375
}
507
}
376
508
509
int (obstack_make_room) (obstack, length)
510
     struct obstack *obstack;
511
     int length;
512
{
513
  return obstack_make_room (obstack, length);
514
}
515
377
void (obstack_grow) (obstack, pointer, length)
516
void (obstack_grow) (obstack, pointer, length)
378
     struct obstack *obstack;
517
     struct obstack *obstack;
379
     POINTER pointer;
518
     POINTER pointer;
Lines 451-454 Link Here
451
590
452
#endif /* 0 */
591
#endif /* 0 */
453
592
454
#endif	/* _LIBC or not __GNU_LIBRARY__.  */
593
#endif	/* !ELIDE_CODE */
(-)grep/obstack.h (-66 / +173 lines)
Lines 1-5 Link Here
1
/* obstack.h - object stack macros
1
/* obstack.h - object stack macros
2
   Copyright (C) 1988, 1992 Free Software Foundation, Inc.
2
   Copyright (C) 1988,89,90,91,92,93,94,96,97, 98 Free Software Foundation, Inc.
3
4
   the C library, however.  The master source lives in /gd/gnu/lib.
5
6
NOTE: The canonical source of this file is maintained with the
7
GNU C Library.  Bugs can be reported to bug-glibc@prep.ai.mit.edu.
3
8
4
This program is free software; you can redistribute it and/or modify it
9
This program is free software; you can redistribute it and/or modify it
5
under the terms of the GNU General Public License as published by the
10
under the terms of the GNU General Public License as published by the
Lines 12-19 Link Here
12
GNU General Public License for more details.
17
GNU General Public License for more details.
13
18
14
You should have received a copy of the GNU General Public License
19
You should have received a copy of the GNU General Public License
15
along with this program; if not, write to the Free Software
20
along with this program; if not, write to the Free Software Foundation,
16
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
21
Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
17
22
18
/* Summary:
23
/* Summary:
19
24
Lines 100-146 Link Here
100
105
101
/* Don't do the contents of this file more than once.  */
106
/* Don't do the contents of this file more than once.  */
102
107
103
#ifndef __OBSTACKS__
108
#ifndef _OBSTACK_H
104
#define __OBSTACKS__
109
#define _OBSTACK_H 1
110
111
#ifdef __cplusplus
112
extern "C" {
113
#endif
105
114
106
/* We use subtraction of (char *)0 instead of casting to int
115
/* We use subtraction of (char *) 0 instead of casting to int
107
   because on word-addressable machines a simple cast to int
116
   because on word-addressable machines a simple cast to int
108
   may ignore the byte-within-word field of the pointer.  */
117
   may ignore the byte-within-word field of the pointer.  */
109
118
110
#ifndef __PTR_TO_INT
119
#ifndef __PTR_TO_INT
111
#define __PTR_TO_INT(P) ((P) - (char *)0)
120
#define __PTR_TO_INT(P) ((P) - (char *) 0)
112
#endif
121
#endif
113
122
114
#ifndef __INT_TO_PTR
123
#ifndef __INT_TO_PTR
115
#define __INT_TO_PTR(P) ((P) + (char *)0)
124
#define __INT_TO_PTR(P) ((P) + (char *) 0)
116
#endif
125
#endif
117
126
118
/* We need the type of the resulting object.  In ANSI C it is ptrdiff_t
127
/* We need the type of the resulting object.  In ANSI C it is ptrdiff_t
119
   but in traditional C it is usually long.  If we are in ANSI C and
128
   but in traditional C it is usually long.  If we are in ANSI C and
120
   don't already have ptrdiff_t get it.  */
129
   don't already have ptrdiff_t get it.  */
121
130
122
#if defined (__STDC__) && ! defined (offsetof)
131
#if defined (__STDC__) && __STDC__ && ! defined (offsetof)
123
#if defined (__GNUC__) && defined (IN_GCC)
132
#if defined (__GNUC__) && defined (IN_GCC)
124
/* On Next machine, the system's stddef.h screws up if included
133
/* On Next machine, the system's stddef.h screws up if included
125
   after we have defined just ptrdiff_t, so include all of gstddef.h.
134
   after we have defined just ptrdiff_t, so include all of stddef.h.
126
   Otherwise, define just ptrdiff_t, which is all we need.  */
135
   Otherwise, define just ptrdiff_t, which is all we need.  */
127
#ifndef __NeXT__
136
#ifndef __NeXT__
128
#define __need_ptrdiff_t
137
#define __need_ptrdiff_t
129
#endif
138
#endif
139
#endif
130
140
131
/* While building GCC, the stddef.h that goes with GCC has this name.  */
132
#include "gstddef.h"
133
#else
134
#include <stddef.h>
141
#include <stddef.h>
135
#endif
142
#endif
136
#endif
137
143
138
#ifdef __STDC__
144
#if defined (__STDC__) && __STDC__
139
#define PTR_INT_TYPE ptrdiff_t
145
#define PTR_INT_TYPE ptrdiff_t
140
#else
146
#else
141
#define PTR_INT_TYPE long
147
#define PTR_INT_TYPE long
142
#endif
148
#endif
143
149
150
#if defined (_LIBC) || defined (HAVE_STRING_H)
151
#include <string.h>
152
#define _obstack_memcpy(To, From, N) memcpy ((To), (From), (N))
153
#else
154
#ifdef memcpy
155
#define _obstack_memcpy(To, From, N) memcpy ((To), (From), (N))
156
#else
157
#define _obstack_memcpy(To, From, N) bcopy ((From), (To), (N))
158
#endif
159
#endif
160
144
struct _obstack_chunk		/* Lives at front of each chunk. */
161
struct _obstack_chunk		/* Lives at front of each chunk. */
145
{
162
{
146
  char  *limit;			/* 1 past end of this chunk */
163
  char  *limit;			/* 1 past end of this chunk */
Lines 151-189 Link Here
151
struct obstack		/* control current object in current chunk */
168
struct obstack		/* control current object in current chunk */
152
{
169
{
153
  long	chunk_size;		/* preferred size to allocate chunks in */
170
  long	chunk_size;		/* preferred size to allocate chunks in */
154
  struct _obstack_chunk* chunk;	/* address of current struct obstack_chunk */
171
  struct _obstack_chunk *chunk;	/* address of current struct obstack_chunk */
155
  char	*object_base;		/* address of object we are building */
172
  char	*object_base;		/* address of object we are building */
156
  char	*next_free;		/* where to add next char to current object */
173
  char	*next_free;		/* where to add next char to current object */
157
  char	*chunk_limit;		/* address of char after current chunk */
174
  char	*chunk_limit;		/* address of char after current chunk */
158
  PTR_INT_TYPE temp;		/* Temporary for some macros.  */
175
  PTR_INT_TYPE temp;		/* Temporary for some macros.  */
159
  int   alignment_mask;		/* Mask of alignment for each object. */
176
  int   alignment_mask;		/* Mask of alignment for each object. */
177
#if defined (__STDC__) && __STDC__
178
  /* These prototypes vary based on `use_extra_arg', and we use
179
     casts to the prototypeless function type in all assignments,
180
     but having prototypes here quiets -Wstrict-prototypes.  */
181
  struct _obstack_chunk *(*chunkfun) (void *, long);
182
  void (*freefun) (void *, struct _obstack_chunk *);
183
  void *extra_arg;		/* first arg for chunk alloc/dealloc funcs */
184
#else
160
  struct _obstack_chunk *(*chunkfun) (); /* User's fcn to allocate a chunk.  */
185
  struct _obstack_chunk *(*chunkfun) (); /* User's fcn to allocate a chunk.  */
161
  void (*freefun) ();		/* User's function to free a chunk.  */
186
  void (*freefun) ();		/* User's function to free a chunk.  */
162
  char *extra_arg;		/* first arg for chunk alloc/dealloc funcs */
187
  char *extra_arg;		/* first arg for chunk alloc/dealloc funcs */
188
#endif
163
  unsigned use_extra_arg:1;	/* chunk alloc/dealloc funcs take extra arg */
189
  unsigned use_extra_arg:1;	/* chunk alloc/dealloc funcs take extra arg */
164
  unsigned maybe_empty_object:1;/* There is a possibility that the current
190
  unsigned maybe_empty_object:1;/* There is a possibility that the current
165
				   chunk contains a zero-length object.  This
191
				   chunk contains a zero-length object.  This
166
				   prevents freeing the chunk if we allocate
192
				   prevents freeing the chunk if we allocate
167
				   a bigger chunk to replace it. */
193
				   a bigger chunk to replace it. */
194
  unsigned alloc_failed:1;	/* No longer used, as we now call the failed
195
				   handler on error, but retained for binary
196
				   compatibility.  */
168
};
197
};
169
198
170
/* Declare the external functions we use; they are in obstack.c.  */
199
/* Declare the external functions we use; they are in obstack.c.  */
171
200
172
#ifdef __STDC__
201
#if defined (__STDC__) && __STDC__
173
extern void _obstack_newchunk (struct obstack *, int);
202
extern void _obstack_newchunk (struct obstack *, int);
174
extern void _obstack_free (struct obstack *, void *);
203
extern void _obstack_free (struct obstack *, void *);
175
extern void _obstack_begin (struct obstack *, int, int,
204
extern int _obstack_begin (struct obstack *, int, int,
176
			    void *(*) (), void (*) ());
205
			    void *(*) (long), void (*) (void *));
177
extern void _obstack_begin_1 (struct obstack *, int, int,
206
extern int _obstack_begin_1 (struct obstack *, int, int,
178
			      void *(*) (), void (*) (), void *);
207
			     void *(*) (void *, long),
208
			     void (*) (void *, void *), void *);
209
extern int _obstack_memory_used (struct obstack *);
179
#else
210
#else
180
extern void _obstack_newchunk ();
211
extern void _obstack_newchunk ();
181
extern void _obstack_free ();
212
extern void _obstack_free ();
182
extern void _obstack_begin ();
213
extern int _obstack_begin ();
183
extern void _obstack_begin_1 ();
214
extern int _obstack_begin_1 ();
215
extern int _obstack_memory_used ();
184
#endif
216
#endif
185
217
186
#ifdef __STDC__
218
#if defined (__STDC__) && __STDC__
187
219
188
/* Do the function-declarations after the structs
220
/* Do the function-declarations after the structs
189
   but before defining the macros.  */
221
   but before defining the macros.  */
Lines 211-216 Link Here
211
int obstack_object_size (struct obstack *obstack);
243
int obstack_object_size (struct obstack *obstack);
212
244
213
int obstack_room (struct obstack *obstack);
245
int obstack_room (struct obstack *obstack);
246
void obstack_make_room (struct obstack *obstack, int size);
214
void obstack_1grow_fast (struct obstack *obstack, int data_char);
247
void obstack_1grow_fast (struct obstack *obstack, int data_char);
215
void obstack_ptr_grow_fast (struct obstack *obstack, void *data);
248
void obstack_ptr_grow_fast (struct obstack *obstack, void *data);
216
void obstack_int_grow_fast (struct obstack *obstack, int data);
249
void obstack_int_grow_fast (struct obstack *obstack, int data);
Lines 220-230 Link Here
220
void * obstack_next_free (struct obstack *obstack);
253
void * obstack_next_free (struct obstack *obstack);
221
int obstack_alignment_mask (struct obstack *obstack);
254
int obstack_alignment_mask (struct obstack *obstack);
222
int obstack_chunk_size (struct obstack *obstack);
255
int obstack_chunk_size (struct obstack *obstack);
256
int obstack_memory_used (struct obstack *obstack);
223
257
224
#endif /* __STDC__ */
258
#endif /* __STDC__ */
225
259
226
/* Non-ANSI C cannot really support alternative functions for these macros,
260
/* Non-ANSI C cannot really support alternative functions for these macros,
227
   so we do not declare them.  */
261
   so we do not declare them.  */
262
263
/* Error handler called when `obstack_chunk_alloc' failed to allocate
264
   more memory.  This can be set to a user defined function.  The
265
   default action is to print a message and abort.  */
266
#if defined (__STDC__) && __STDC__
267
extern void (*obstack_alloc_failed_handler) (void);
268
#else
269
extern void (*obstack_alloc_failed_handler) ();
270
#endif
271
272
/* Exit value used when `print_and_abort' is used.  */
273
extern int obstack_exit_failure;
228
274
229
/* Pointer to beginning of object being allocated or to be allocated next.
275
/* Pointer to beginning of object being allocated or to be allocated next.
230
   Note that this might not be the final address of the object
276
   Note that this might not be the final address of the object
Lines 244-249 Link Here
244
290
245
#define obstack_alignment_mask(h) ((h)->alignment_mask)
291
#define obstack_alignment_mask(h) ((h)->alignment_mask)
246
292
293
/* To prevent prototype warnings provide complete argument list in
294
   standard C version.  */
295
#if defined (__STDC__) && __STDC__
296
297
#define obstack_init(h) \
298
  _obstack_begin ((h), 0, 0, \
299
		  (void *(*) (long)) obstack_chunk_alloc, (void (*) (void *)) obstack_chunk_free)
300
301
#define obstack_begin(h, size) \
302
  _obstack_begin ((h), (size), 0, \
303
		  (void *(*) (long)) obstack_chunk_alloc, (void (*) (void *)) obstack_chunk_free)
304
305
#define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \
306
  _obstack_begin ((h), (size), (alignment), \
307
		    (void *(*) (long)) (chunkfun), (void (*) (void *)) (freefun))
308
309
#define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \
310
  _obstack_begin_1 ((h), (size), (alignment), \
311
		    (void *(*) (void *, long)) (chunkfun), \
312
		    (void (*) (void *, void *)) (freefun), (arg))
313
314
#define obstack_chunkfun(h, newchunkfun) \
315
  ((h) -> chunkfun = (struct _obstack_chunk *(*)(void *, long)) (newchunkfun))
316
317
#define obstack_freefun(h, newfreefun) \
318
  ((h) -> freefun = (void (*)(void *, struct _obstack_chunk *)) (newfreefun))
319
320
#else
321
247
#define obstack_init(h) \
322
#define obstack_init(h) \
248
  _obstack_begin ((h), 0, 0, \
323
  _obstack_begin ((h), 0, 0, \
249
		  (void *(*) ()) obstack_chunk_alloc, (void (*) ()) obstack_chunk_free)
324
		  (void *(*) ()) obstack_chunk_alloc, (void (*) ()) obstack_chunk_free)
Lines 260-271 Link Here
260
  _obstack_begin_1 ((h), (size), (alignment), \
335
  _obstack_begin_1 ((h), (size), (alignment), \
261
		    (void *(*) ()) (chunkfun), (void (*) ()) (freefun), (arg))
336
		    (void *(*) ()) (chunkfun), (void (*) ()) (freefun), (arg))
262
337
338
#define obstack_chunkfun(h, newchunkfun) \
339
  ((h) -> chunkfun = (struct _obstack_chunk *(*)()) (newchunkfun))
340
341
#define obstack_freefun(h, newfreefun) \
342
  ((h) -> freefun = (void (*)()) (newfreefun))
343
344
#endif
345
263
#define obstack_1grow_fast(h,achar) (*((h)->next_free)++ = achar)
346
#define obstack_1grow_fast(h,achar) (*((h)->next_free)++ = achar)
264
347
265
#define obstack_blank_fast(h,n) ((h)->next_free += (n))
348
#define obstack_blank_fast(h,n) ((h)->next_free += (n))
349
350
#define obstack_memory_used(h) _obstack_memory_used (h)
266
351
267
#if defined (__GNUC__) && defined (__STDC__)
352
#if defined (__GNUC__) && defined (__STDC__) && __STDC__
268
#if __GNUC__ < 2 || defined(NeXT)
353
/* NextStep 2.0 cc is really gcc 1.93 but it defines __GNUC__ = 2 and
354
   does not implement __extension__.  But that compiler doesn't define
355
   __GNUC_MINOR__.  */
356
#if __GNUC__ < 2 || (__NeXT__ && !__GNUC_MINOR__)
269
#define __extension__
357
#define __extension__
270
#endif
358
#endif
271
359
Lines 284-301 Link Here
284
  ({ struct obstack *__o = (OBSTACK);					\
372
  ({ struct obstack *__o = (OBSTACK);					\
285
     (unsigned) (__o->chunk_limit - __o->next_free); })
373
     (unsigned) (__o->chunk_limit - __o->next_free); })
286
374
287
/* Note that the call to _obstack_newchunk is enclosed in (..., 0)
375
#define obstack_make_room(OBSTACK,length)				\
288
   so that we can avoid having void expressions
376
__extension__								\
289
   in the arms of the conditional expression.
377
({ struct obstack *__o = (OBSTACK);					\
290
   Casting the third operand to void was tried before,
378
   int __len = (length);						\
291
   but some compilers won't accept it.  */
379
   if (__o->chunk_limit - __o->next_free < __len)			\
380
     _obstack_newchunk (__o, __len);					\
381
   (void) 0; })
382
292
#define obstack_grow(OBSTACK,where,length)				\
383
#define obstack_grow(OBSTACK,where,length)				\
293
__extension__								\
384
__extension__								\
294
({ struct obstack *__o = (OBSTACK);					\
385
({ struct obstack *__o = (OBSTACK);					\
295
   int __len = (length);						\
386
   int __len = (length);						\
296
   ((__o->next_free + __len > __o->chunk_limit)				\
387
   if (__o->next_free + __len > __o->chunk_limit)			\
297
    ? (_obstack_newchunk (__o, __len), 0) : 0);				\
388
     _obstack_newchunk (__o, __len);					\
298
   bcopy (where, __o->next_free, __len);				\
389
   _obstack_memcpy (__o->next_free, (char *) (where), __len);		\
299
   __o->next_free += __len;						\
390
   __o->next_free += __len;						\
300
   (void) 0; })
391
   (void) 0; })
301
392
Lines 303-320 Link Here
303
__extension__								\
394
__extension__								\
304
({ struct obstack *__o = (OBSTACK);					\
395
({ struct obstack *__o = (OBSTACK);					\
305
   int __len = (length);						\
396
   int __len = (length);						\
306
   ((__o->next_free + __len + 1 > __o->chunk_limit)			\
397
   if (__o->next_free + __len + 1 > __o->chunk_limit)			\
307
    ? (_obstack_newchunk (__o, __len + 1), 0) : 0),			\
398
     _obstack_newchunk (__o, __len + 1);				\
308
   bcopy (where, __o->next_free, __len),				\
399
   _obstack_memcpy (__o->next_free, (char *) (where), __len);		\
309
   __o->next_free += __len,						\
400
   __o->next_free += __len;						\
310
   *(__o->next_free)++ = 0;						\
401
   *(__o->next_free)++ = 0;						\
311
   (void) 0; })
402
   (void) 0; })
312
403
313
#define obstack_1grow(OBSTACK,datum)					\
404
#define obstack_1grow(OBSTACK,datum)					\
314
__extension__								\
405
__extension__								\
315
({ struct obstack *__o = (OBSTACK);					\
406
({ struct obstack *__o = (OBSTACK);					\
316
   ((__o->next_free + 1 > __o->chunk_limit)				\
407
   if (__o->next_free + 1 > __o->chunk_limit)				\
317
    ? (_obstack_newchunk (__o, 1), 0) : 0),				\
408
     _obstack_newchunk (__o, 1);					\
318
   *(__o->next_free)++ = (datum);					\
409
   *(__o->next_free)++ = (datum);					\
319
   (void) 0; })
410
   (void) 0; })
320
411
Lines 325-352 Link Here
325
#define obstack_ptr_grow(OBSTACK,datum)					\
416
#define obstack_ptr_grow(OBSTACK,datum)					\
326
__extension__								\
417
__extension__								\
327
({ struct obstack *__o = (OBSTACK);					\
418
({ struct obstack *__o = (OBSTACK);					\
328
   ((__o->next_free + sizeof (void *) > __o->chunk_limit)		\
419
   if (__o->next_free + sizeof (void *) > __o->chunk_limit)		\
329
    ? (_obstack_newchunk (__o, sizeof (void *)), 0) : 0),		\
420
     _obstack_newchunk (__o, sizeof (void *));				\
330
   *((void **)__o->next_free)++ = ((void *)datum);			\
421
   *((void **)__o->next_free)++ = ((void *)datum);			\
331
   (void) 0; })
422
   (void) 0; })
332
423
333
#define obstack_int_grow(OBSTACK,datum)					\
424
#define obstack_int_grow(OBSTACK,datum)					\
334
__extension__								\
425
__extension__								\
335
({ struct obstack *__o = (OBSTACK);					\
426
({ struct obstack *__o = (OBSTACK);					\
336
   ((__o->next_free + sizeof (int) > __o->chunk_limit)			\
427
   if (__o->next_free + sizeof (int) > __o->chunk_limit)		\
337
    ? (_obstack_newchunk (__o, sizeof (int)), 0) : 0),			\
428
     _obstack_newchunk (__o, sizeof (int));				\
338
   *((int *)__o->next_free)++ = ((int)datum);				\
429
   *((int *)__o->next_free)++ = ((int)datum);				\
339
   (void) 0; })
430
   (void) 0; })
340
431
341
#define obstack_ptr_grow_fast(h,aptr) (*((void **)(h)->next_free)++ = (void *)aptr)
432
#define obstack_ptr_grow_fast(h,aptr) (*((void **) (h)->next_free)++ = (void *)aptr)
342
#define obstack_int_grow_fast(h,aint) (*((int *)(h)->next_free)++ = (int)aint)
433
#define obstack_int_grow_fast(h,aint) (*((int *) (h)->next_free)++ = (int) aint)
343
434
344
#define obstack_blank(OBSTACK,length)					\
435
#define obstack_blank(OBSTACK,length)					\
345
__extension__								\
436
__extension__								\
346
({ struct obstack *__o = (OBSTACK);					\
437
({ struct obstack *__o = (OBSTACK);					\
347
   int __len = (length);						\
438
   int __len = (length);						\
348
   ((__o->chunk_limit - __o->next_free < __len)				\
439
   if (__o->chunk_limit - __o->next_free < __len)			\
349
    ? (_obstack_newchunk (__o, __len), 0) : 0);				\
440
     _obstack_newchunk (__o, __len);					\
350
   __o->next_free += __len;						\
441
   __o->next_free += __len;						\
351
   (void) 0; })
442
   (void) 0; })
352
443
Lines 373-387 Link Here
373
#define obstack_finish(OBSTACK)  					\
464
#define obstack_finish(OBSTACK)  					\
374
__extension__								\
465
__extension__								\
375
({ struct obstack *__o1 = (OBSTACK);					\
466
({ struct obstack *__o1 = (OBSTACK);					\
376
   void *value = (void *) __o1->object_base;				\
467
   void *value;								\
468
   value = (void *) __o1->object_base;					\
377
   if (__o1->next_free == value)					\
469
   if (__o1->next_free == value)					\
378
     __o1->maybe_empty_object = 1;					\
470
     __o1->maybe_empty_object = 1;					\
379
   __o1->next_free							\
471
   __o1->next_free							\
380
     = __INT_TO_PTR ((__PTR_TO_INT (__o1->next_free)+__o1->alignment_mask)\
472
     = __INT_TO_PTR ((__PTR_TO_INT (__o1->next_free)+__o1->alignment_mask)\
381
		     & ~ (__o1->alignment_mask));			\
473
		     & ~ (__o1->alignment_mask));			\
382
   ((__o1->next_free - (char *)__o1->chunk				\
474
   if (__o1->next_free - (char *)__o1->chunk				\
383
     > __o1->chunk_limit - (char *)__o1->chunk)				\
475
       > __o1->chunk_limit - (char *)__o1->chunk)			\
384
    ? (__o1->next_free = __o1->chunk_limit) : 0);			\
476
     __o1->next_free = __o1->chunk_limit;				\
385
   __o1->object_base = __o1->next_free;					\
477
   __o1->object_base = __o1->next_free;					\
386
   value; })
478
   value; })
387
479
Lines 401-444 Link Here
401
#define obstack_room(h)		\
493
#define obstack_room(h)		\
402
 (unsigned) ((h)->chunk_limit - (h)->next_free)
494
 (unsigned) ((h)->chunk_limit - (h)->next_free)
403
495
496
/* Note that the call to _obstack_newchunk is enclosed in (..., 0)
497
   so that we can avoid having void expressions
498
   in the arms of the conditional expression.
499
   Casting the third operand to void was tried before,
500
   but some compilers won't accept it.  */
501
502
#define obstack_make_room(h,length)					\
503
( (h)->temp = (length),							\
504
  (((h)->next_free + (h)->temp > (h)->chunk_limit)			\
505
   ? (_obstack_newchunk ((h), (h)->temp), 0) : 0))
506
404
#define obstack_grow(h,where,length)					\
507
#define obstack_grow(h,where,length)					\
405
( (h)->temp = (length),							\
508
( (h)->temp = (length),							\
406
  (((h)->next_free + (h)->temp > (h)->chunk_limit)			\
509
  (((h)->next_free + (h)->temp > (h)->chunk_limit)			\
407
   ? (_obstack_newchunk ((h), (h)->temp), 0) : 0),			\
510
   ? (_obstack_newchunk ((h), (h)->temp), 0) : 0),			\
408
  bcopy (where, (h)->next_free, (h)->temp),				\
511
  _obstack_memcpy ((h)->next_free, (char *) (where), (h)->temp),	\
409
  (h)->next_free += (h)->temp)
512
  (h)->next_free += (h)->temp)
410
513
411
#define obstack_grow0(h,where,length)					\
514
#define obstack_grow0(h,where,length)					\
412
( (h)->temp = (length),							\
515
( (h)->temp = (length),							\
413
  (((h)->next_free + (h)->temp + 1 > (h)->chunk_limit)			\
516
  (((h)->next_free + (h)->temp + 1 > (h)->chunk_limit)			\
414
   ? (_obstack_newchunk ((h), (h)->temp + 1), 0) : 0),			\
517
   ? (_obstack_newchunk ((h), (h)->temp + 1), 0) : 0),			\
415
  bcopy (where, (h)->next_free, (h)->temp),				\
518
  _obstack_memcpy ((h)->next_free, (char *) (where), (h)->temp),	\
416
  (h)->next_free += (h)->temp,						\
519
  (h)->next_free += (h)->temp,						\
417
  *((h)->next_free)++ = 0)
520
  *((h)->next_free)++ = 0)
418
521
419
#define obstack_1grow(h,datum)						\
522
#define obstack_1grow(h,datum)						\
420
( (((h)->next_free + 1 > (h)->chunk_limit)				\
523
( (((h)->next_free + 1 > (h)->chunk_limit)				\
421
   ? (_obstack_newchunk ((h), 1), 0) : 0),				\
524
   ? (_obstack_newchunk ((h), 1), 0) : 0),				\
422
  *((h)->next_free)++ = (datum))
525
  (*((h)->next_free)++ = (datum)))
423
526
424
#define obstack_ptr_grow(h,datum)					\
527
#define obstack_ptr_grow(h,datum)					\
425
( (((h)->next_free + sizeof (char *) > (h)->chunk_limit)		\
528
( (((h)->next_free + sizeof (char *) > (h)->chunk_limit)		\
426
   ? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0),		\
529
   ? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0),		\
427
  *((char **)(((h)->next_free+=sizeof(char *))-sizeof(char *))) = ((char *)datum))
530
  (*((char **) (((h)->next_free+=sizeof(char *))-sizeof(char *))) = ((char *) datum)))
428
531
429
#define obstack_int_grow(h,datum)					\
532
#define obstack_int_grow(h,datum)					\
430
( (((h)->next_free + sizeof (int) > (h)->chunk_limit)			\
533
( (((h)->next_free + sizeof (int) > (h)->chunk_limit)			\
431
   ? (_obstack_newchunk ((h), sizeof (int)), 0) : 0),			\
534
   ? (_obstack_newchunk ((h), sizeof (int)), 0) : 0),			\
432
  *((int *)(((h)->next_free+=sizeof(int))-sizeof(int))) = ((int)datum))
535
  (*((int *) (((h)->next_free+=sizeof(int))-sizeof(int))) = ((int) datum)))
433
536
434
#define obstack_ptr_grow_fast(h,aptr) (*((char **)(h)->next_free)++ = (char *)aptr)
537
#define obstack_ptr_grow_fast(h,aptr) (*((char **) (h)->next_free)++ = (char *) aptr)
435
#define obstack_int_grow_fast(h,aint) (*((int *)(h)->next_free)++ = (int)aint)
538
#define obstack_int_grow_fast(h,aint) (*((int *) (h)->next_free)++ = (int) aint)
436
539
437
#define obstack_blank(h,length)						\
540
#define obstack_blank(h,length)						\
438
( (h)->temp = (length),							\
541
( (h)->temp = (length),							\
439
  (((h)->chunk_limit - (h)->next_free < (h)->temp)			\
542
  (((h)->chunk_limit - (h)->next_free < (h)->temp)			\
440
   ? (_obstack_newchunk ((h), (h)->temp), 0) : 0),			\
543
   ? (_obstack_newchunk ((h), (h)->temp), 0) : 0),			\
441
  (h)->next_free += (h)->temp)
544
  ((h)->next_free += (h)->temp))
442
545
443
#define obstack_alloc(h,length)						\
546
#define obstack_alloc(h,length)						\
444
 (obstack_blank ((h), (length)), obstack_finish ((h)))
547
 (obstack_blank ((h), (length)), obstack_finish ((h)))
Lines 457-478 Link Here
457
  (h)->next_free							\
560
  (h)->next_free							\
458
    = __INT_TO_PTR ((__PTR_TO_INT ((h)->next_free)+(h)->alignment_mask)	\
561
    = __INT_TO_PTR ((__PTR_TO_INT ((h)->next_free)+(h)->alignment_mask)	\
459
		    & ~ ((h)->alignment_mask)),				\
562
		    & ~ ((h)->alignment_mask)),				\
460
  (((h)->next_free - (char *)(h)->chunk					\
563
  (((h)->next_free - (char *) (h)->chunk				\
461
    > (h)->chunk_limit - (char *)(h)->chunk)				\
564
    > (h)->chunk_limit - (char *) (h)->chunk)				\
462
   ? ((h)->next_free = (h)->chunk_limit) : 0),				\
565
   ? ((h)->next_free = (h)->chunk_limit) : 0),				\
463
  (h)->object_base = (h)->next_free,					\
566
  (h)->object_base = (h)->next_free,					\
464
  __INT_TO_PTR ((h)->temp))
567
  __INT_TO_PTR ((h)->temp))
465
568
466
#ifdef __STDC__
569
#if defined (__STDC__) && __STDC__
467
#define obstack_free(h,obj)						\
570
#define obstack_free(h,obj)						\
468
( (h)->temp = (char *)(obj) - (char *) (h)->chunk,			\
571
( (h)->temp = (char *) (obj) - (char *) (h)->chunk,			\
469
  (((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\
572
  (((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\
470
   ? (int) ((h)->next_free = (h)->object_base				\
573
   ? (int) ((h)->next_free = (h)->object_base				\
471
	    = (h)->temp + (char *) (h)->chunk)				\
574
	    = (h)->temp + (char *) (h)->chunk)				\
472
   : (((obstack_free) ((h), (h)->temp + (char *) (h)->chunk), 0), 0)))
575
   : (((obstack_free) ((h), (h)->temp + (char *) (h)->chunk), 0), 0)))
473
#else
576
#else
474
#define obstack_free(h,obj)						\
577
#define obstack_free(h,obj)						\
475
( (h)->temp = (char *)(obj) - (char *) (h)->chunk,			\
578
( (h)->temp = (char *) (obj) - (char *) (h)->chunk,			\
476
  (((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\
579
  (((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\
477
   ? (int) ((h)->next_free = (h)->object_base				\
580
   ? (int) ((h)->next_free = (h)->object_base				\
478
	    = (h)->temp + (char *) (h)->chunk)				\
581
	    = (h)->temp + (char *) (h)->chunk)				\
Lines 481-484 Link Here
481
584
482
#endif /* not __GNUC__ or not __STDC__ */
585
#endif /* not __GNUC__ or not __STDC__ */
483
586
484
#endif /* not __OBSTACKS__ */
587
#ifdef __cplusplus
588
}	/* C++ */
589
#endif
590
591
#endif /* obstack.h */
(-)grep/regex.c (+5829 lines)
Line 0 Link Here
1
/* Extended regular expression matching and search library,
2
   version 0.12.
3
   (Implements POSIX draft P1003.2/D11.2, except for some of the
4
   internationalization features.)
5
   Copyright (C) 1993, 94, 95, 96, 97, 98 Free Software Foundation, Inc.
6
7
   The GNU C Library is free software; you can redistribute it and/or
8
   modify it under the terms of the GNU Library General Public License as
9
   published by the Free Software Foundation; either version 2 of the
10
   License, or (at your option) any later version.
11
12
   The GNU C Library is distributed in the hope that it will be useful,
13
   but WITHOUT ANY WARRANTY; without even the implied warranty of
14
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
   Library General Public License for more details.
16
17
   You should have received a copy of the GNU Library General Public
18
   License along with the GNU C Library; see the file COPYING.LIB.  If not,
19
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20
   Boston, MA 02111-1307, USA.  */
21
22
/* AIX requires this to be the first thing in the file. */
23
#if defined _AIX && !defined REGEX_MALLOC
24
  #pragma alloca
25
#endif
26
27
#undef	_GNU_SOURCE
28
#define _GNU_SOURCE
29
30
#ifdef HAVE_CONFIG_H
31
# include <config.h>
32
#endif
33
34
#ifndef PARAMS
35
# if defined __GNUC__ || (defined __STDC__ && __STDC__)
36
#  define PARAMS(args) args
37
# else
38
#  define PARAMS(args) ()
39
# endif  /* GCC.  */
40
#endif  /* Not PARAMS.  */
41
42
#if defined STDC_HEADERS && !defined emacs
43
# include <stddef.h>
44
#else
45
/* We need this for `regex.h', and perhaps for the Emacs include files.  */
46
# include <sys/types.h>
47
#endif
48
49
#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
50
51
/* For platform which support the ISO C amendement 1 functionality we
52
   support user defined character classes.  */
53
#if defined _LIBC || WIDE_CHAR_SUPPORT
54
/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
55
# include <wchar.h>
56
# include <wctype.h>
57
#endif
58
59
#ifdef _LIBC
60
/* We have to keep the namespace clean.  */
61
# define regfree(preg) __regfree (preg)
62
# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
63
# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
64
# define regerror(errcode, preg, errbuf, errbuf_size) \
65
	__regerror(errcode, preg, errbuf, errbuf_size)
66
# define re_set_registers(bu, re, nu, st, en) \
67
	__re_set_registers (bu, re, nu, st, en)
68
# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
69
	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
70
# define re_match(bufp, string, size, pos, regs) \
71
	__re_match (bufp, string, size, pos, regs)
72
# define re_search(bufp, string, size, startpos, range, regs) \
73
	__re_search (bufp, string, size, startpos, range, regs)
74
# define re_compile_pattern(pattern, length, bufp) \
75
	__re_compile_pattern (pattern, length, bufp)
76
# define re_set_syntax(syntax) __re_set_syntax (syntax)
77
# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
78
	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
79
# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
80
81
#define btowc __btowc
82
#endif
83
84
/* This is for other GNU distributions with internationalized messages.  */
85
#if HAVE_LIBINTL_H || defined _LIBC
86
# include <libintl.h>
87
#else
88
# define gettext(msgid) (msgid)
89
#endif
90
91
#ifndef gettext_noop
92
/* This define is so xgettext can find the internationalizable
93
   strings.  */
94
# define gettext_noop(String) String
95
#endif
96
97
/* The `emacs' switch turns on certain matching commands
98
   that make sense only in Emacs. */
99
#ifdef emacs
100
101
# include "lisp.h"
102
# include "buffer.h"
103
# include "syntax.h"
104
105
#else  /* not emacs */
106
107
/* If we are not linking with Emacs proper,
108
   we can't use the relocating allocator
109
   even if config.h says that we can.  */
110
# undef REL_ALLOC
111
112
# if defined STDC_HEADERS || defined _LIBC
113
#  include <stdlib.h>
114
# else
115
char *malloc ();
116
char *realloc ();
117
# endif
118
119
/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
120
   If nothing else has been done, use the method below.  */
121
# ifdef INHIBIT_STRING_HEADER
122
#  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
123
#   if !defined bzero && !defined bcopy
124
#    undef INHIBIT_STRING_HEADER
125
#   endif
126
#  endif
127
# endif
128
129
/* This is the normal way of making sure we have a bcopy and a bzero.
130
   This is used in most programs--a few other programs avoid this
131
   by defining INHIBIT_STRING_HEADER.  */
132
# ifndef INHIBIT_STRING_HEADER
133
#  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
134
#   include <string.h>
135
#   ifndef bzero
136
#    ifndef _LIBC
137
#     define bzero(s, n)	(memset (s, '\0', n), (s))
138
#    else
139
#     define bzero(s, n)	__bzero (s, n)
140
#    endif
141
#   endif
142
#  else
143
#   include <strings.h>
144
#   ifndef memcmp
145
#    define memcmp(s1, s2, n)	bcmp (s1, s2, n)
146
#   endif
147
#   ifndef memcpy
148
#    define memcpy(d, s, n)	(bcopy (s, d, n), (d))
149
#   endif
150
#  endif
151
# endif
152
153
/* Define the syntax stuff for \<, \>, etc.  */
154
155
/* This must be nonzero for the wordchar and notwordchar pattern
156
   commands in re_match_2.  */
157
# ifndef Sword
158
#  define Sword 1
159
# endif
160
161
# ifdef SWITCH_ENUM_BUG
162
#  define SWITCH_ENUM_CAST(x) ((int)(x))
163
# else
164
#  define SWITCH_ENUM_CAST(x) (x)
165
# endif
166
167
/* How many characters in the character set.  */
168
# define CHAR_SET_SIZE 256
169
170
# ifdef SYNTAX_TABLE
171
172
extern char *re_syntax_table;
173
174
# else /* not SYNTAX_TABLE */
175
176
static char re_syntax_table[CHAR_SET_SIZE];
177
178
static void
179
init_syntax_once ()
180
{
181
   register int c;
182
   static int done = 0;
183
184
   if (done)
185
     return;
186
187
   bzero (re_syntax_table, sizeof re_syntax_table);
188
189
   for (c = 'a'; c <= 'z'; c++)
190
     re_syntax_table[c] = Sword;
191
192
   for (c = 'A'; c <= 'Z'; c++)
193
     re_syntax_table[c] = Sword;
194
195
   for (c = '0'; c <= '9'; c++)
196
     re_syntax_table[c] = Sword;
197
198
   re_syntax_table['_'] = Sword;
199
200
   done = 1;
201
}
202
203
# endif /* not SYNTAX_TABLE */
204
205
# define SYNTAX(c) re_syntax_table[c]
206
207
#endif /* not emacs */
208
209
/* Get the interface, including the syntax bits.  */
210
#include "regex.h"
211
212
/* isalpha etc. are used for the character classes.  */
213
#include <ctype.h>
214
215
/* Jim Meyering writes:
216
217
   "... Some ctype macros are valid only for character codes that
218
   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
219
   using /bin/cc or gcc but without giving an ansi option).  So, all
220
   ctype uses should be through macros like ISPRINT...  If
221
   STDC_HEADERS is defined, then autoconf has verified that the ctype
222
   macros don't need to be guarded with references to isascii. ...
223
   Defining isascii to 1 should let any compiler worth its salt
224
   eliminate the && through constant folding."
225
   Solaris defines some of these symbols so we must undefine them first.  */
226
227
#undef ISASCII
228
#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
229
# define ISASCII(c) 1
230
#else
231
# define ISASCII(c) isascii(c)
232
#endif
233
234
#ifdef isblank
235
# define ISBLANK(c) (ISASCII (c) && isblank (c))
236
#else
237
# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
238
#endif
239
#ifdef isgraph
240
# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
241
#else
242
# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
243
#endif
244
245
#undef ISPRINT
246
#define ISPRINT(c) (ISASCII (c) && isprint (c))
247
#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
248
#define ISALNUM(c) (ISASCII (c) && isalnum (c))
249
#define ISALPHA(c) (ISASCII (c) && isalpha (c))
250
#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
251
#define ISLOWER(c) (ISASCII (c) && islower (c))
252
#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
253
#define ISSPACE(c) (ISASCII (c) && isspace (c))
254
#define ISUPPER(c) (ISASCII (c) && isupper (c))
255
#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
256
257
#ifndef NULL
258
# define NULL (void *)0
259
#endif
260
261
/* We remove any previous definition of `SIGN_EXTEND_CHAR',
262
   since ours (we hope) works properly with all combinations of
263
   machines, compilers, `char' and `unsigned char' argument types.
264
   (Per Bothner suggested the basic approach.)  */
265
#undef SIGN_EXTEND_CHAR
266
#if __STDC__
267
# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
268
#else  /* not __STDC__ */
269
/* As in Harbison and Steele.  */
270
# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
271
#endif
272
273
/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
274
   use `alloca' instead of `malloc'.  This is because using malloc in
275
   re_search* or re_match* could cause memory leaks when C-g is used in
276
   Emacs; also, malloc is slower and causes storage fragmentation.  On
277
   the other hand, malloc is more portable, and easier to debug.
278
279
   Because we sometimes use alloca, some routines have to be macros,
280
   not functions -- `alloca'-allocated space disappears at the end of the
281
   function it is called in.  */
282
283
#ifdef REGEX_MALLOC
284
285
# define REGEX_ALLOCATE malloc
286
# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
287
# define REGEX_FREE free
288
289
#else /* not REGEX_MALLOC  */
290
291
/* Emacs already defines alloca, sometimes.  */
292
# ifndef alloca
293
294
/* Make alloca work the best possible way.  */
295
#  ifdef __GNUC__
296
#   define alloca __builtin_alloca
297
#  else /* not __GNUC__ */
298
#   if HAVE_ALLOCA_H
299
#    include <alloca.h>
300
#   endif /* HAVE_ALLOCA_H */
301
#  endif /* not __GNUC__ */
302
303
# endif /* not alloca */
304
305
# define REGEX_ALLOCATE alloca
306
307
/* Assumes a `char *destination' variable.  */
308
# define REGEX_REALLOCATE(source, osize, nsize)				\
309
  (destination = (char *) alloca (nsize),				\
310
   memcpy (destination, source, osize))
311
312
/* No need to do anything to free, after alloca.  */
313
# define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
314
315
#endif /* not REGEX_MALLOC */
316
317
/* Define how to allocate the failure stack.  */
318
319
#if defined REL_ALLOC && defined REGEX_MALLOC
320
321
# define REGEX_ALLOCATE_STACK(size)				\
322
  r_alloc (&failure_stack_ptr, (size))
323
# define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
324
  r_re_alloc (&failure_stack_ptr, (nsize))
325
# define REGEX_FREE_STACK(ptr)					\
326
  r_alloc_free (&failure_stack_ptr)
327
328
#else /* not using relocating allocator */
329
330
# ifdef REGEX_MALLOC
331
332
#  define REGEX_ALLOCATE_STACK malloc
333
#  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
334
#  define REGEX_FREE_STACK free
335
336
# else /* not REGEX_MALLOC */
337
338
#  define REGEX_ALLOCATE_STACK alloca
339
340
#  define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
341
   REGEX_REALLOCATE (source, osize, nsize)
342
/* No need to explicitly free anything.  */
343
#  define REGEX_FREE_STACK(arg)
344
345
# endif /* not REGEX_MALLOC */
346
#endif /* not using relocating allocator */
347
348
349
/* True if `size1' is non-NULL and PTR is pointing anywhere inside
350
   `string1' or just past its end.  This works if PTR is NULL, which is
351
   a good thing.  */
352
#define FIRST_STRING_P(ptr) 					\
353
  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
354
355
/* (Re)Allocate N items of type T using malloc, or fail.  */
356
#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
357
#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
358
#define RETALLOC_IF(addr, n, t) \
359
  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
360
#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
361
362
#define BYTEWIDTH 8 /* In bits.  */
363
364
#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
365
366
#undef MAX
367
#undef MIN
368
#define MAX(a, b) ((a) > (b) ? (a) : (b))
369
#define MIN(a, b) ((a) < (b) ? (a) : (b))
370
371
typedef char boolean;
372
#define false 0
373
#define true 1
374
375
static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
376
					const char *string1, int size1,
377
					const char *string2, int size2,
378
					int pos,
379
					struct re_registers *regs,
380
					int stop));
381
382
/* These are the command codes that appear in compiled regular
383
   expressions.  Some opcodes are followed by argument bytes.  A
384
   command code can specify any interpretation whatsoever for its
385
   arguments.  Zero bytes may appear in the compiled regular expression.  */
386
387
typedef enum
388
{
389
  no_op = 0,
390
391
  /* Succeed right away--no more backtracking.  */
392
  succeed,
393
394
        /* Followed by one byte giving n, then by n literal bytes.  */
395
  exactn,
396
397
        /* Matches any (more or less) character.  */
398
  anychar,
399
400
        /* Matches any one char belonging to specified set.  First
401
           following byte is number of bitmap bytes.  Then come bytes
402
           for a bitmap saying which chars are in.  Bits in each byte
403
           are ordered low-bit-first.  A character is in the set if its
404
           bit is 1.  A character too large to have a bit in the map is
405
           automatically not in the set.  */
406
  charset,
407
408
        /* Same parameters as charset, but match any character that is
409
           not one of those specified.  */
410
  charset_not,
411
412
        /* Start remembering the text that is matched, for storing in a
413
           register.  Followed by one byte with the register number, in
414
           the range 0 to one less than the pattern buffer's re_nsub
415
           field.  Then followed by one byte with the number of groups
416
           inner to this one.  (This last has to be part of the
417
           start_memory only because we need it in the on_failure_jump
418
           of re_match_2.)  */
419
  start_memory,
420
421
        /* Stop remembering the text that is matched and store it in a
422
           memory register.  Followed by one byte with the register
423
           number, in the range 0 to one less than `re_nsub' in the
424
           pattern buffer, and one byte with the number of inner groups,
425
           just like `start_memory'.  (We need the number of inner
426
           groups here because we don't have any easy way of finding the
427
           corresponding start_memory when we're at a stop_memory.)  */
428
  stop_memory,
429
430
        /* Match a duplicate of something remembered. Followed by one
431
           byte containing the register number.  */
432
  duplicate,
433
434
        /* Fail unless at beginning of line.  */
435
  begline,
436
437
        /* Fail unless at end of line.  */
438
  endline,
439
440
        /* Succeeds if at beginning of buffer (if emacs) or at beginning
441
           of string to be matched (if not).  */
442
  begbuf,
443
444
        /* Analogously, for end of buffer/string.  */
445
  endbuf,
446
447
        /* Followed by two byte relative address to which to jump.  */
448
  jump,
449
450
	/* Same as jump, but marks the end of an alternative.  */
451
  jump_past_alt,
452
453
        /* Followed by two-byte relative address of place to resume at
454
           in case of failure.  */
455
  on_failure_jump,
456
457
        /* Like on_failure_jump, but pushes a placeholder instead of the
458
           current string position when executed.  */
459
  on_failure_keep_string_jump,
460
461
        /* Throw away latest failure point and then jump to following
462
           two-byte relative address.  */
463
  pop_failure_jump,
464
465
        /* Change to pop_failure_jump if know won't have to backtrack to
466
           match; otherwise change to jump.  This is used to jump
467
           back to the beginning of a repeat.  If what follows this jump
468
           clearly won't match what the repeat does, such that we can be
469
           sure that there is no use backtracking out of repetitions
470
           already matched, then we change it to a pop_failure_jump.
471
           Followed by two-byte address.  */
472
  maybe_pop_jump,
473
474
        /* Jump to following two-byte address, and push a dummy failure
475
           point. This failure point will be thrown away if an attempt
476
           is made to use it for a failure.  A `+' construct makes this
477
           before the first repeat.  Also used as an intermediary kind
478
           of jump when compiling an alternative.  */
479
  dummy_failure_jump,
480
481
	/* Push a dummy failure point and continue.  Used at the end of
482
	   alternatives.  */
483
  push_dummy_failure,
484
485
        /* Followed by two-byte relative address and two-byte number n.
486
           After matching N times, jump to the address upon failure.  */
487
  succeed_n,
488
489
        /* Followed by two-byte relative address, and two-byte number n.
490
           Jump to the address N times, then fail.  */
491
  jump_n,
492
493
        /* Set the following two-byte relative address to the
494
           subsequent two-byte number.  The address *includes* the two
495
           bytes of number.  */
496
  set_number_at,
497
498
  wordchar,	/* Matches any word-constituent character.  */
499
  notwordchar,	/* Matches any char that is not a word-constituent.  */
500
501
  wordbeg,	/* Succeeds if at word beginning.  */
502
  wordend,	/* Succeeds if at word end.  */
503
504
  wordbound,	/* Succeeds if at a word boundary.  */
505
  notwordbound	/* Succeeds if not at a word boundary.  */
506
507
#ifdef emacs
508
  ,before_dot,	/* Succeeds if before point.  */
509
  at_dot,	/* Succeeds if at point.  */
510
  after_dot,	/* Succeeds if after point.  */
511
512
	/* Matches any character whose syntax is specified.  Followed by
513
           a byte which contains a syntax code, e.g., Sword.  */
514
  syntaxspec,
515
516
	/* Matches any character whose syntax is not that specified.  */
517
  notsyntaxspec
518
#endif /* emacs */
519
} re_opcode_t;
520
521
/* Common operations on the compiled pattern.  */
522
523
/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
524
525
#define STORE_NUMBER(destination, number)				\
526
  do {									\
527
    (destination)[0] = (number) & 0377;					\
528
    (destination)[1] = (number) >> 8;					\
529
  } while (0)
530
531
/* Same as STORE_NUMBER, except increment DESTINATION to
532
   the byte after where the number is stored.  Therefore, DESTINATION
533
   must be an lvalue.  */
534
535
#define STORE_NUMBER_AND_INCR(destination, number)			\
536
  do {									\
537
    STORE_NUMBER (destination, number);					\
538
    (destination) += 2;							\
539
  } while (0)
540
541
/* Put into DESTINATION a number stored in two contiguous bytes starting
542
   at SOURCE.  */
543
544
#define EXTRACT_NUMBER(destination, source)				\
545
  do {									\
546
    (destination) = *(source) & 0377;					\
547
    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
548
  } while (0)
549
550
#ifdef DEBUG
551
static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
552
static void
553
extract_number (dest, source)
554
    int *dest;
555
    unsigned char *source;
556
{
557
  int temp = SIGN_EXTEND_CHAR (*(source + 1));
558
  *dest = *source & 0377;
559
  *dest += temp << 8;
560
}
561
562
# ifndef EXTRACT_MACROS /* To debug the macros.  */
563
#  undef EXTRACT_NUMBER
564
#  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
565
# endif /* not EXTRACT_MACROS */
566
567
#endif /* DEBUG */
568
569
/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
570
   SOURCE must be an lvalue.  */
571
572
#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
573
  do {									\
574
    EXTRACT_NUMBER (destination, source);				\
575
    (source) += 2; 							\
576
  } while (0)
577
578
#ifdef DEBUG
579
static void extract_number_and_incr _RE_ARGS ((int *destination,
580
					       unsigned char **source));
581
static void
582
extract_number_and_incr (destination, source)
583
    int *destination;
584
    unsigned char **source;
585
{
586
  extract_number (destination, *source);
587
  *source += 2;
588
}
589
590
# ifndef EXTRACT_MACROS
591
#  undef EXTRACT_NUMBER_AND_INCR
592
#  define EXTRACT_NUMBER_AND_INCR(dest, src) \
593
  extract_number_and_incr (&dest, &src)
594
# endif /* not EXTRACT_MACROS */
595
596
#endif /* DEBUG */
597
598
/* If DEBUG is defined, Regex prints many voluminous messages about what
599
   it is doing (if the variable `debug' is nonzero).  If linked with the
600
   main program in `iregex.c', you can enter patterns and strings
601
   interactively.  And if linked with the main program in `main.c' and
602
   the other test files, you can run the already-written tests.  */
603
604
#ifdef DEBUG
605
606
/* We use standard I/O for debugging.  */
607
# include <stdio.h>
608
609
/* It is useful to test things that ``must'' be true when debugging.  */
610
# include <assert.h>
611
612
static int debug = 0;
613
614
# define DEBUG_STATEMENT(e) e
615
# define DEBUG_PRINT1(x) if (debug) printf (x)
616
# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
617
# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
618
# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
619
# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
620
  if (debug) print_partial_compiled_pattern (s, e)
621
# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
622
  if (debug) print_double_string (w, s1, sz1, s2, sz2)
623
624
625
/* Print the fastmap in human-readable form.  */
626
627
void
628
print_fastmap (fastmap)
629
    char *fastmap;
630
{
631
  unsigned was_a_range = 0;
632
  unsigned i = 0;
633
634
  while (i < (1 << BYTEWIDTH))
635
    {
636
      if (fastmap[i++])
637
	{
638
	  was_a_range = 0;
639
          putchar (i - 1);
640
          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
641
            {
642
              was_a_range = 1;
643
              i++;
644
            }
645
	  if (was_a_range)
646
            {
647
              printf ("-");
648
              putchar (i - 1);
649
            }
650
        }
651
    }
652
  putchar ('\n');
653
}
654
655
656
/* Print a compiled pattern string in human-readable form, starting at
657
   the START pointer into it and ending just before the pointer END.  */
658
659
void
660
print_partial_compiled_pattern (start, end)
661
    unsigned char *start;
662
    unsigned char *end;
663
{
664
  int mcnt, mcnt2;
665
  unsigned char *p1;
666
  unsigned char *p = start;
667
  unsigned char *pend = end;
668
669
  if (start == NULL)
670
    {
671
      printf ("(null)\n");
672
      return;
673
    }
674
675
  /* Loop over pattern commands.  */
676
  while (p < pend)
677
    {
678
      printf ("%d:\t", p - start);
679
680
      switch ((re_opcode_t) *p++)
681
	{
682
        case no_op:
683
          printf ("/no_op");
684
          break;
685
686
	case exactn:
687
	  mcnt = *p++;
688
          printf ("/exactn/%d", mcnt);
689
          do
690
	    {
691
              putchar ('/');
692
	      putchar (*p++);
693
            }
694
          while (--mcnt);
695
          break;
696
697
	case start_memory:
698
          mcnt = *p++;
699
          printf ("/start_memory/%d/%d", mcnt, *p++);
700
          break;
701
702
	case stop_memory:
703
          mcnt = *p++;
704
	  printf ("/stop_memory/%d/%d", mcnt, *p++);
705
          break;
706
707
	case duplicate:
708
	  printf ("/duplicate/%d", *p++);
709
	  break;
710
711
	case anychar:
712
	  printf ("/anychar");
713
	  break;
714
715
	case charset:
716
        case charset_not:
717
          {
718
            register int c, last = -100;
719
	    register int in_range = 0;
720
721
	    printf ("/charset [%s",
722
	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
723
724
            assert (p + *p < pend);
725
726
            for (c = 0; c < 256; c++)
727
	      if (c / 8 < *p
728
		  && (p[1 + (c/8)] & (1 << (c % 8))))
729
		{
730
		  /* Are we starting a range?  */
731
		  if (last + 1 == c && ! in_range)
732
		    {
733
		      putchar ('-');
734
		      in_range = 1;
735
		    }
736
		  /* Have we broken a range?  */
737
		  else if (last + 1 != c && in_range)
738
              {
739
		      putchar (last);
740
		      in_range = 0;
741
		    }
742
743
		  if (! in_range)
744
		    putchar (c);
745
746
		  last = c;
747
              }
748
749
	    if (in_range)
750
	      putchar (last);
751
752
	    putchar (']');
753
754
	    p += 1 + *p;
755
	  }
756
	  break;
757
758
	case begline:
759
	  printf ("/begline");
760
          break;
761
762
	case endline:
763
          printf ("/endline");
764
          break;
765
766
	case on_failure_jump:
767
          extract_number_and_incr (&mcnt, &p);
768
  	  printf ("/on_failure_jump to %d", p + mcnt - start);
769
          break;
770
771
	case on_failure_keep_string_jump:
772
          extract_number_and_incr (&mcnt, &p);
773
  	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
774
          break;
775
776
	case dummy_failure_jump:
777
          extract_number_and_incr (&mcnt, &p);
778
  	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
779
          break;
780
781
	case push_dummy_failure:
782
          printf ("/push_dummy_failure");
783
          break;
784
785
        case maybe_pop_jump:
786
          extract_number_and_incr (&mcnt, &p);
787
  	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
788
	  break;
789
790
        case pop_failure_jump:
791
	  extract_number_and_incr (&mcnt, &p);
792
  	  printf ("/pop_failure_jump to %d", p + mcnt - start);
793
	  break;
794
795
        case jump_past_alt:
796
	  extract_number_and_incr (&mcnt, &p);
797
  	  printf ("/jump_past_alt to %d", p + mcnt - start);
798
	  break;
799
800
        case jump:
801
	  extract_number_and_incr (&mcnt, &p);
802
  	  printf ("/jump to %d", p + mcnt - start);
803
	  break;
804
805
        case succeed_n:
806
          extract_number_and_incr (&mcnt, &p);
807
	  p1 = p + mcnt;
808
          extract_number_and_incr (&mcnt2, &p);
809
	  printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
810
          break;
811
812
        case jump_n:
813
          extract_number_and_incr (&mcnt, &p);
814
	  p1 = p + mcnt;
815
          extract_number_and_incr (&mcnt2, &p);
816
	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
817
          break;
818
819
        case set_number_at:
820
          extract_number_and_incr (&mcnt, &p);
821
	  p1 = p + mcnt;
822
          extract_number_and_incr (&mcnt2, &p);
823
	  printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
824
          break;
825
826
        case wordbound:
827
	  printf ("/wordbound");
828
	  break;
829
830
	case notwordbound:
831
	  printf ("/notwordbound");
832
          break;
833
834
	case wordbeg:
835
	  printf ("/wordbeg");
836
	  break;
837
838
	case wordend:
839
	  printf ("/wordend");
840
841
# ifdef emacs
842
	case before_dot:
843
	  printf ("/before_dot");
844
          break;
845
846
	case at_dot:
847
	  printf ("/at_dot");
848
          break;
849
850
	case after_dot:
851
	  printf ("/after_dot");
852
          break;
853
854
	case syntaxspec:
855
          printf ("/syntaxspec");
856
	  mcnt = *p++;
857
	  printf ("/%d", mcnt);
858
          break;
859
860
	case notsyntaxspec:
861
          printf ("/notsyntaxspec");
862
	  mcnt = *p++;
863
	  printf ("/%d", mcnt);
864
	  break;
865
# endif /* emacs */
866
867
	case wordchar:
868
	  printf ("/wordchar");
869
          break;
870
871
	case notwordchar:
872
	  printf ("/notwordchar");
873
          break;
874
875
	case begbuf:
876
	  printf ("/begbuf");
877
          break;
878
879
	case endbuf:
880
	  printf ("/endbuf");
881
          break;
882
883
        default:
884
          printf ("?%d", *(p-1));
885
	}
886
887
      putchar ('\n');
888
    }
889
890
  printf ("%d:\tend of pattern.\n", p - start);
891
}
892
893
894
void
895
print_compiled_pattern (bufp)
896
    struct re_pattern_buffer *bufp;
897
{
898
  unsigned char *buffer = bufp->buffer;
899
900
  print_partial_compiled_pattern (buffer, buffer + bufp->used);
901
  printf ("%ld bytes used/%ld bytes allocated.\n",
902
	  bufp->used, bufp->allocated);
903
904
  if (bufp->fastmap_accurate && bufp->fastmap)
905
    {
906
      printf ("fastmap: ");
907
      print_fastmap (bufp->fastmap);
908
    }
909
910
  printf ("re_nsub: %d\t", bufp->re_nsub);
911
  printf ("regs_alloc: %d\t", bufp->regs_allocated);
912
  printf ("can_be_null: %d\t", bufp->can_be_null);
913
  printf ("newline_anchor: %d\n", bufp->newline_anchor);
914
  printf ("no_sub: %d\t", bufp->no_sub);
915
  printf ("not_bol: %d\t", bufp->not_bol);
916
  printf ("not_eol: %d\t", bufp->not_eol);
917
  printf ("syntax: %lx\n", bufp->syntax);
918
  /* Perhaps we should print the translate table?  */
919
}
920
921
922
void
923
print_double_string (where, string1, size1, string2, size2)
924
    const char *where;
925
    const char *string1;
926
    const char *string2;
927
    int size1;
928
    int size2;
929
{
930
  int this_char;
931
932
  if (where == NULL)
933
    printf ("(null)");
934
  else
935
    {
936
      if (FIRST_STRING_P (where))
937
        {
938
          for (this_char = where - string1; this_char < size1; this_char++)
939
            putchar (string1[this_char]);
940
941
          where = string2;
942
        }
943
944
      for (this_char = where - string2; this_char < size2; this_char++)
945
        putchar (string2[this_char]);
946
    }
947
}
948
949
void
950
printchar (c)
951
     int c;
952
{
953
  putc (c, stderr);
954
}
955
956
#else /* not DEBUG */
957
958
# undef assert
959
# define assert(e)
960
961
# define DEBUG_STATEMENT(e)
962
# define DEBUG_PRINT1(x)
963
# define DEBUG_PRINT2(x1, x2)
964
# define DEBUG_PRINT3(x1, x2, x3)
965
# define DEBUG_PRINT4(x1, x2, x3, x4)
966
# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
967
# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
968
969
#endif /* not DEBUG */
970
971
/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
972
   also be assigned to arbitrarily: each pattern buffer stores its own
973
   syntax, so it can be changed between regex compilations.  */
974
/* This has no initializer because initialized variables in Emacs
975
   become read-only after dumping.  */
976
reg_syntax_t re_syntax_options;
977
978
979
/* Specify the precise syntax of regexps for compilation.  This provides
980
   for compatibility for various utilities which historically have
981
   different, incompatible syntaxes.
982
983
   The argument SYNTAX is a bit mask comprised of the various bits
984
   defined in regex.h.  We return the old syntax.  */
985
986
reg_syntax_t
987
re_set_syntax (syntax)
988
    reg_syntax_t syntax;
989
{
990
  reg_syntax_t ret = re_syntax_options;
991
992
  re_syntax_options = syntax;
993
#ifdef DEBUG
994
  if (syntax & RE_DEBUG)
995
    debug = 1;
996
  else if (debug) /* was on but now is not */
997
    debug = 0;
998
#endif /* DEBUG */
999
  return ret;
1000
}
1001
#ifdef _LIBC
1002
weak_alias (__re_set_syntax, re_set_syntax)
1003
#endif
1004
1005
/* This table gives an error message for each of the error codes listed
1006
   in regex.h.  Obviously the order here has to be same as there.
1007
   POSIX doesn't require that we do anything for REG_NOERROR,
1008
   but why not be nice?  */
1009
1010
static const char *re_error_msgid[] =
1011
  {
1012
    gettext_noop ("Success"),	/* REG_NOERROR */
1013
    gettext_noop ("No match"),	/* REG_NOMATCH */
1014
    gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1015
    gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1016
    gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1017
    gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1018
    gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1019
    gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
1020
    gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1021
    gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1022
    gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1023
    gettext_noop ("Invalid range end"),	/* REG_ERANGE */
1024
    gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1025
    gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1026
    gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1027
    gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1028
    gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1029
  };
1030
1031
/* Avoiding alloca during matching, to placate r_alloc.  */
1032
1033
/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1034
   searching and matching functions should not call alloca.  On some
1035
   systems, alloca is implemented in terms of malloc, and if we're
1036
   using the relocating allocator routines, then malloc could cause a
1037
   relocation, which might (if the strings being searched are in the
1038
   ralloc heap) shift the data out from underneath the regexp
1039
   routines.
1040
1041
   Here's another reason to avoid allocation: Emacs
1042
   processes input from X in a signal handler; processing X input may
1043
   call malloc; if input arrives while a matching routine is calling
1044
   malloc, then we're scrod.  But Emacs can't just block input while
1045
   calling matching routines; then we don't notice interrupts when
1046
   they come in.  So, Emacs blocks input around all regexp calls
1047
   except the matching calls, which it leaves unprotected, in the
1048
   faith that they will not malloc.  */
1049
1050
/* Normally, this is fine.  */
1051
#define MATCH_MAY_ALLOCATE
1052
1053
/* When using GNU C, we are not REALLY using the C alloca, no matter
1054
   what config.h may say.  So don't take precautions for it.  */
1055
#ifdef __GNUC__
1056
# undef C_ALLOCA
1057
#endif
1058
1059
/* The match routines may not allocate if (1) they would do it with malloc
1060
   and (2) it's not safe for them to use malloc.
1061
   Note that if REL_ALLOC is defined, matching would not use malloc for the
1062
   failure stack, but we would still use it for the register vectors;
1063
   so REL_ALLOC should not affect this.  */
1064
#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1065
# undef MATCH_MAY_ALLOCATE
1066
#endif
1067
1068
1069
/* Failure stack declarations and macros; both re_compile_fastmap and
1070
   re_match_2 use a failure stack.  These have to be macros because of
1071
   REGEX_ALLOCATE_STACK.  */
1072
1073
1074
/* Number of failure points for which to initially allocate space
1075
   when matching.  If this number is exceeded, we allocate more
1076
   space, so it is not a hard limit.  */
1077
#ifndef INIT_FAILURE_ALLOC
1078
# define INIT_FAILURE_ALLOC 5
1079
#endif
1080
1081
/* Roughly the maximum number of failure points on the stack.  Would be
1082
   exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1083
   This is a variable only so users of regex can assign to it; we never
1084
   change it ourselves.  */
1085
1086
#ifdef INT_IS_16BIT
1087
1088
# if defined MATCH_MAY_ALLOCATE
1089
/* 4400 was enough to cause a crash on Alpha OSF/1,
1090
   whose default stack limit is 2mb.  */
1091
long int re_max_failures = 4000;
1092
# else
1093
long int re_max_failures = 2000;
1094
# endif
1095
1096
union fail_stack_elt
1097
{
1098
  unsigned char *pointer;
1099
  long int integer;
1100
};
1101
1102
typedef union fail_stack_elt fail_stack_elt_t;
1103
1104
typedef struct
1105
{
1106
  fail_stack_elt_t *stack;
1107
  unsigned long int size;
1108
  unsigned long int avail;		/* Offset of next open position.  */
1109
} fail_stack_type;
1110
1111
#else /* not INT_IS_16BIT */
1112
1113
# if defined MATCH_MAY_ALLOCATE
1114
/* 4400 was enough to cause a crash on Alpha OSF/1,
1115
   whose default stack limit is 2mb.  */
1116
int re_max_failures = 20000;
1117
# else
1118
int re_max_failures = 2000;
1119
# endif
1120
1121
union fail_stack_elt
1122
{
1123
  unsigned char *pointer;
1124
  int integer;
1125
};
1126
1127
typedef union fail_stack_elt fail_stack_elt_t;
1128
1129
typedef struct
1130
{
1131
  fail_stack_elt_t *stack;
1132
  unsigned size;
1133
  unsigned avail;			/* Offset of next open position.  */
1134
} fail_stack_type;
1135
1136
#endif /* INT_IS_16BIT */
1137
1138
#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1139
#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1140
#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1141
1142
1143
/* Define macros to initialize and free the failure stack.
1144
   Do `return -2' if the alloc fails.  */
1145
1146
#ifdef MATCH_MAY_ALLOCATE
1147
# define INIT_FAIL_STACK()						\
1148
  do {									\
1149
    fail_stack.stack = (fail_stack_elt_t *)				\
1150
      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
1151
									\
1152
    if (fail_stack.stack == NULL)					\
1153
      return -2;							\
1154
									\
1155
    fail_stack.size = INIT_FAILURE_ALLOC;				\
1156
    fail_stack.avail = 0;						\
1157
  } while (0)
1158
1159
# define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1160
#else
1161
# define INIT_FAIL_STACK()						\
1162
  do {									\
1163
    fail_stack.avail = 0;						\
1164
  } while (0)
1165
1166
# define RESET_FAIL_STACK()
1167
#endif
1168
1169
1170
/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1171
1172
   Return 1 if succeeds, and 0 if either ran out of memory
1173
   allocating space for it or it was already too large.
1174
1175
   REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1176
1177
#define DOUBLE_FAIL_STACK(fail_stack)					\
1178
  ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
1179
   ? 0									\
1180
   : ((fail_stack).stack = (fail_stack_elt_t *)				\
1181
        REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
1182
          (fail_stack).size * sizeof (fail_stack_elt_t),		\
1183
          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
1184
									\
1185
      (fail_stack).stack == NULL					\
1186
      ? 0								\
1187
      : ((fail_stack).size <<= 1, 					\
1188
         1)))
1189
1190
1191
/* Push pointer POINTER on FAIL_STACK.
1192
   Return 1 if was able to do so and 0 if ran out of memory allocating
1193
   space to do so.  */
1194
#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1195
  ((FAIL_STACK_FULL ()							\
1196
    && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
1197
   ? 0									\
1198
   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1199
      1))
1200
1201
/* Push a pointer value onto the failure stack.
1202
   Assumes the variable `fail_stack'.  Probably should only
1203
   be called from within `PUSH_FAILURE_POINT'.  */
1204
#define PUSH_FAILURE_POINTER(item)					\
1205
  fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1206
1207
/* This pushes an integer-valued item onto the failure stack.
1208
   Assumes the variable `fail_stack'.  Probably should only
1209
   be called from within `PUSH_FAILURE_POINT'.  */
1210
#define PUSH_FAILURE_INT(item)					\
1211
  fail_stack.stack[fail_stack.avail++].integer = (item)
1212
1213
/* Push a fail_stack_elt_t value onto the failure stack.
1214
   Assumes the variable `fail_stack'.  Probably should only
1215
   be called from within `PUSH_FAILURE_POINT'.  */
1216
#define PUSH_FAILURE_ELT(item)					\
1217
  fail_stack.stack[fail_stack.avail++] =  (item)
1218
1219
/* These three POP... operations complement the three PUSH... operations.
1220
   All assume that `fail_stack' is nonempty.  */
1221
#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1222
#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1223
#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1224
1225
/* Used to omit pushing failure point id's when we're not debugging.  */
1226
#ifdef DEBUG
1227
# define DEBUG_PUSH PUSH_FAILURE_INT
1228
# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1229
#else
1230
# define DEBUG_PUSH(item)
1231
# define DEBUG_POP(item_addr)
1232
#endif
1233
1234
1235
/* Push the information about the state we will need
1236
   if we ever fail back to it.
1237
1238
   Requires variables fail_stack, regstart, regend, reg_info, and
1239
   num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
1240
   be declared.
1241
1242
   Does `return FAILURE_CODE' if runs out of memory.  */
1243
1244
#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1245
  do {									\
1246
    char *destination;							\
1247
    /* Must be int, so when we don't save any registers, the arithmetic	\
1248
       of 0 + -1 isn't done as unsigned.  */				\
1249
    /* Can't be int, since there is not a shred of a guarantee that int	\
1250
       is wide enough to hold a value of something to which pointer can	\
1251
       be assigned */							\
1252
    active_reg_t this_reg;						\
1253
    									\
1254
    DEBUG_STATEMENT (failure_id++);					\
1255
    DEBUG_STATEMENT (nfailure_points_pushed++);				\
1256
    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1257
    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1258
    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1259
									\
1260
    DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
1261
    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
1262
									\
1263
    /* Ensure we have enough space allocated for what we will push.  */	\
1264
    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1265
      {									\
1266
        if (!DOUBLE_FAIL_STACK (fail_stack))				\
1267
          return failure_code;						\
1268
									\
1269
        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1270
		       (fail_stack).size);				\
1271
        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1272
      }									\
1273
									\
1274
    /* Push the info, starting with the registers.  */			\
1275
    DEBUG_PRINT1 ("\n");						\
1276
									\
1277
    if (1)								\
1278
      for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1279
	   this_reg++)							\
1280
	{								\
1281
	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
1282
	  DEBUG_STATEMENT (num_regs_pushed++);				\
1283
									\
1284
	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
1285
	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1286
									\
1287
	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
1288
	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1289
									\
1290
	  DEBUG_PRINT2 ("    info: %p\n      ",				\
1291
			reg_info[this_reg].word.pointer);		\
1292
	  DEBUG_PRINT2 (" match_null=%d",				\
1293
			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1294
	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1295
	  DEBUG_PRINT2 (" matched_something=%d",			\
1296
			MATCHED_SOMETHING (reg_info[this_reg]));	\
1297
	  DEBUG_PRINT2 (" ever_matched=%d",				\
1298
			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1299
	  DEBUG_PRINT1 ("\n");						\
1300
	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1301
	}								\
1302
									\
1303
    DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
1304
    PUSH_FAILURE_INT (lowest_active_reg);				\
1305
									\
1306
    DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
1307
    PUSH_FAILURE_INT (highest_active_reg);				\
1308
									\
1309
    DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
1310
    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1311
    PUSH_FAILURE_POINTER (pattern_place);				\
1312
									\
1313
    DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
1314
    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1315
				 size2);				\
1316
    DEBUG_PRINT1 ("'\n");						\
1317
    PUSH_FAILURE_POINTER (string_place);				\
1318
									\
1319
    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1320
    DEBUG_PUSH (failure_id);						\
1321
  } while (0)
1322
1323
/* This is the number of items that are pushed and popped on the stack
1324
   for each register.  */
1325
#define NUM_REG_ITEMS  3
1326
1327
/* Individual items aside from the registers.  */
1328
#ifdef DEBUG
1329
# define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1330
#else
1331
# define NUM_NONREG_ITEMS 4
1332
#endif
1333
1334
/* We push at most this many items on the stack.  */
1335
/* We used to use (num_regs - 1), which is the number of registers
1336
   this regexp will save; but that was changed to 5
1337
   to avoid stack overflow for a regexp with lots of parens.  */
1338
#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1339
1340
/* We actually push this many items.  */
1341
#define NUM_FAILURE_ITEMS				\
1342
  (((0							\
1343
     ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1344
    * NUM_REG_ITEMS)					\
1345
   + NUM_NONREG_ITEMS)
1346
1347
/* How many items can still be added to the stack without overflowing it.  */
1348
#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1349
1350
1351
/* Pops what PUSH_FAIL_STACK pushes.
1352
1353
   We restore into the parameters, all of which should be lvalues:
1354
     STR -- the saved data position.
1355
     PAT -- the saved pattern position.
1356
     LOW_REG, HIGH_REG -- the highest and lowest active registers.
1357
     REGSTART, REGEND -- arrays of string positions.
1358
     REG_INFO -- array of information about each subexpression.
1359
1360
   Also assumes the variables `fail_stack' and (if debugging), `bufp',
1361
   `pend', `string1', `size1', `string2', and `size2'.  */
1362
1363
#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1364
{									\
1365
  DEBUG_STATEMENT (unsigned failure_id;)				\
1366
  active_reg_t this_reg;						\
1367
  const unsigned char *string_temp;					\
1368
									\
1369
  assert (!FAIL_STACK_EMPTY ());					\
1370
									\
1371
  /* Remove failure points and point to how many regs pushed.  */	\
1372
  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1373
  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1374
  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
1375
									\
1376
  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1377
									\
1378
  DEBUG_POP (&failure_id);						\
1379
  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1380
									\
1381
  /* If the saved string location is NULL, it came from an		\
1382
     on_failure_keep_string_jump opcode, and we want to throw away the	\
1383
     saved NULL, thus retaining our current position in the string.  */	\
1384
  string_temp = POP_FAILURE_POINTER ();					\
1385
  if (string_temp != NULL)						\
1386
    str = (const char *) string_temp;					\
1387
									\
1388
  DEBUG_PRINT2 ("  Popping string %p: `", str);				\
1389
  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1390
  DEBUG_PRINT1 ("'\n");							\
1391
									\
1392
  pat = (unsigned char *) POP_FAILURE_POINTER ();			\
1393
  DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
1394
  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1395
									\
1396
  /* Restore register info.  */						\
1397
  high_reg = (active_reg_t) POP_FAILURE_INT ();				\
1398
  DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
1399
									\
1400
  low_reg = (active_reg_t) POP_FAILURE_INT ();				\
1401
  DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
1402
									\
1403
  if (1)								\
1404
    for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1405
      {									\
1406
	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
1407
									\
1408
	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1409
	DEBUG_PRINT2 ("      info: %p\n",				\
1410
		      reg_info[this_reg].word.pointer);			\
1411
									\
1412
	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1413
	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
1414
									\
1415
	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1416
	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
1417
      }									\
1418
  else									\
1419
    {									\
1420
      for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1421
	{								\
1422
	  reg_info[this_reg].word.integer = 0;				\
1423
	  regend[this_reg] = 0;						\
1424
	  regstart[this_reg] = 0;					\
1425
	}								\
1426
      highest_active_reg = high_reg;					\
1427
    }									\
1428
									\
1429
  set_regs_matched_done = 0;						\
1430
  DEBUG_STATEMENT (nfailure_points_popped++);				\
1431
} /* POP_FAILURE_POINT */
1432
1433
1434
1435
/* Structure for per-register (a.k.a. per-group) information.
1436
   Other register information, such as the
1437
   starting and ending positions (which are addresses), and the list of
1438
   inner groups (which is a bits list) are maintained in separate
1439
   variables.
1440
1441
   We are making a (strictly speaking) nonportable assumption here: that
1442
   the compiler will pack our bit fields into something that fits into
1443
   the type of `word', i.e., is something that fits into one item on the
1444
   failure stack.  */
1445
1446
1447
/* Declarations and macros for re_match_2.  */
1448
1449
typedef union
1450
{
1451
  fail_stack_elt_t word;
1452
  struct
1453
  {
1454
      /* This field is one if this group can match the empty string,
1455
         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1456
#define MATCH_NULL_UNSET_VALUE 3
1457
    unsigned match_null_string_p : 2;
1458
    unsigned is_active : 1;
1459
    unsigned matched_something : 1;
1460
    unsigned ever_matched_something : 1;
1461
  } bits;
1462
} register_info_type;
1463
1464
#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1465
#define IS_ACTIVE(R)  ((R).bits.is_active)
1466
#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1467
#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1468
1469
1470
/* Call this when have matched a real character; it sets `matched' flags
1471
   for the subexpressions which we are currently inside.  Also records
1472
   that those subexprs have matched.  */
1473
#define SET_REGS_MATCHED()						\
1474
  do									\
1475
    {									\
1476
      if (!set_regs_matched_done)					\
1477
	{								\
1478
	  active_reg_t r;						\
1479
	  set_regs_matched_done = 1;					\
1480
	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1481
	    {								\
1482
	      MATCHED_SOMETHING (reg_info[r])				\
1483
		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1484
		= 1;							\
1485
	    }								\
1486
	}								\
1487
    }									\
1488
  while (0)
1489
1490
/* Registers are set to a sentinel when they haven't yet matched.  */
1491
static char reg_unset_dummy;
1492
#define REG_UNSET_VALUE (&reg_unset_dummy)
1493
#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1494
1495
/* Subroutine declarations and macros for regex_compile.  */
1496
1497
static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
1498
					      reg_syntax_t syntax,
1499
					      struct re_pattern_buffer *bufp));
1500
static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1501
static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1502
				 int arg1, int arg2));
1503
static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1504
				  int arg, unsigned char *end));
1505
static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1506
				  int arg1, int arg2, unsigned char *end));
1507
static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
1508
					   reg_syntax_t syntax));
1509
static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
1510
					   reg_syntax_t syntax));
1511
static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
1512
					      const char *pend,
1513
					      char *translate,
1514
					      reg_syntax_t syntax,
1515
					      unsigned char *b));
1516
1517
/* Fetch the next character in the uncompiled pattern---translating it
1518
   if necessary.  Also cast from a signed character in the constant
1519
   string passed to us by the user to an unsigned char that we can use
1520
   as an array index (in, e.g., `translate').  */
1521
#ifndef PATFETCH
1522
# define PATFETCH(c)							\
1523
  do {if (p == pend) return REG_EEND;					\
1524
    c = (unsigned char) *p++;						\
1525
    if (translate) c = (unsigned char) translate[c];			\
1526
  } while (0)
1527
#endif
1528
1529
/* Fetch the next character in the uncompiled pattern, with no
1530
   translation.  */
1531
#define PATFETCH_RAW(c)							\
1532
  do {if (p == pend) return REG_EEND;					\
1533
    c = (unsigned char) *p++; 						\
1534
  } while (0)
1535
1536
/* Go backwards one character in the pattern.  */
1537
#define PATUNFETCH p--
1538
1539
1540
/* If `translate' is non-null, return translate[D], else just D.  We
1541
   cast the subscript to translate because some data is declared as
1542
   `char *', to avoid warnings when a string constant is passed.  But
1543
   when we use a character as a subscript we must make it unsigned.  */
1544
#ifndef TRANSLATE
1545
# define TRANSLATE(d) \
1546
  (translate ? (char) translate[(unsigned char) (d)] : (d))
1547
#endif
1548
1549
1550
/* Macros for outputting the compiled pattern into `buffer'.  */
1551
1552
/* If the buffer isn't allocated when it comes in, use this.  */
1553
#define INIT_BUF_SIZE  32
1554
1555
/* Make sure we have at least N more bytes of space in buffer.  */
1556
#define GET_BUFFER_SPACE(n)						\
1557
    while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
1558
      EXTEND_BUFFER ()
1559
1560
/* Make sure we have one more byte of buffer space and then add C to it.  */
1561
#define BUF_PUSH(c)							\
1562
  do {									\
1563
    GET_BUFFER_SPACE (1);						\
1564
    *b++ = (unsigned char) (c);						\
1565
  } while (0)
1566
1567
1568
/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1569
#define BUF_PUSH_2(c1, c2)						\
1570
  do {									\
1571
    GET_BUFFER_SPACE (2);						\
1572
    *b++ = (unsigned char) (c1);					\
1573
    *b++ = (unsigned char) (c2);					\
1574
  } while (0)
1575
1576
1577
/* As with BUF_PUSH_2, except for three bytes.  */
1578
#define BUF_PUSH_3(c1, c2, c3)						\
1579
  do {									\
1580
    GET_BUFFER_SPACE (3);						\
1581
    *b++ = (unsigned char) (c1);					\
1582
    *b++ = (unsigned char) (c2);					\
1583
    *b++ = (unsigned char) (c3);					\
1584
  } while (0)
1585
1586
1587
/* Store a jump with opcode OP at LOC to location TO.  We store a
1588
   relative address offset by the three bytes the jump itself occupies.  */
1589
#define STORE_JUMP(op, loc, to) \
1590
  store_op1 (op, loc, (int) ((to) - (loc) - 3))
1591
1592
/* Likewise, for a two-argument jump.  */
1593
#define STORE_JUMP2(op, loc, to, arg) \
1594
  store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
1595
1596
/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1597
#define INSERT_JUMP(op, loc, to) \
1598
  insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
1599
1600
/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1601
#define INSERT_JUMP2(op, loc, to, arg) \
1602
  insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
1603
1604
1605
/* This is not an arbitrary limit: the arguments which represent offsets
1606
   into the pattern are two bytes long.  So if 2^16 bytes turns out to
1607
   be too small, many things would have to change.  */
1608
/* Any other compiler which, like MSC, has allocation limit below 2^16
1609
   bytes will have to use approach similar to what was done below for
1610
   MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
1611
   reallocating to 0 bytes.  Such thing is not going to work too well.
1612
   You have been warned!!  */
1613
#if defined _MSC_VER  && !defined WIN32
1614
/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
1615
   The REALLOC define eliminates a flurry of conversion warnings,
1616
   but is not required. */
1617
# define MAX_BUF_SIZE  65500L
1618
# define REALLOC(p,s) realloc ((p), (size_t) (s))
1619
#else
1620
# define MAX_BUF_SIZE (1L << 16)
1621
# define REALLOC(p,s) realloc ((p), (s))
1622
#endif
1623
1624
/* Extend the buffer by twice its current size via realloc and
1625
   reset the pointers that pointed into the old block to point to the
1626
   correct places in the new one.  If extending the buffer results in it
1627
   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1628
#define EXTEND_BUFFER()							\
1629
  do { 									\
1630
    unsigned char *old_buffer = bufp->buffer;				\
1631
    if (bufp->allocated == MAX_BUF_SIZE) 				\
1632
      return REG_ESIZE;							\
1633
    bufp->allocated <<= 1;						\
1634
    if (bufp->allocated > MAX_BUF_SIZE)					\
1635
      bufp->allocated = MAX_BUF_SIZE; 					\
1636
    bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
1637
    if (bufp->buffer == NULL)						\
1638
      return REG_ESPACE;						\
1639
    /* If the buffer moved, move all the pointers into it.  */		\
1640
    if (old_buffer != bufp->buffer)					\
1641
      {									\
1642
        b = (b - old_buffer) + bufp->buffer;				\
1643
        begalt = (begalt - old_buffer) + bufp->buffer;			\
1644
        if (fixup_alt_jump)						\
1645
          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1646
        if (laststart)							\
1647
          laststart = (laststart - old_buffer) + bufp->buffer;		\
1648
        if (pending_exact)						\
1649
          pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
1650
      }									\
1651
  } while (0)
1652
1653
1654
/* Since we have one byte reserved for the register number argument to
1655
   {start,stop}_memory, the maximum number of groups we can report
1656
   things about is what fits in that byte.  */
1657
#define MAX_REGNUM 255
1658
1659
/* But patterns can have more than `MAX_REGNUM' registers.  We just
1660
   ignore the excess.  */
1661
typedef unsigned regnum_t;
1662
1663
1664
/* Macros for the compile stack.  */
1665
1666
/* Since offsets can go either forwards or backwards, this type needs to
1667
   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1668
/* int may be not enough when sizeof(int) == 2.  */
1669
typedef long pattern_offset_t;
1670
1671
typedef struct
1672
{
1673
  pattern_offset_t begalt_offset;
1674
  pattern_offset_t fixup_alt_jump;
1675
  pattern_offset_t inner_group_offset;
1676
  pattern_offset_t laststart_offset;
1677
  regnum_t regnum;
1678
} compile_stack_elt_t;
1679
1680
1681
typedef struct
1682
{
1683
  compile_stack_elt_t *stack;
1684
  unsigned size;
1685
  unsigned avail;			/* Offset of next open position.  */
1686
} compile_stack_type;
1687
1688
1689
#define INIT_COMPILE_STACK_SIZE 32
1690
1691
#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1692
#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1693
1694
/* The next available element.  */
1695
#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1696
1697
1698
/* Set the bit for character C in a list.  */
1699
#define SET_LIST_BIT(c)                               \
1700
  (b[((unsigned char) (c)) / BYTEWIDTH]               \
1701
   |= 1 << (((unsigned char) c) % BYTEWIDTH))
1702
1703
1704
/* Get the next unsigned number in the uncompiled pattern.  */
1705
#define GET_UNSIGNED_NUMBER(num) 					\
1706
  { if (p != pend)							\
1707
     {									\
1708
       PATFETCH (c); 							\
1709
       while (ISDIGIT (c)) 						\
1710
         { 								\
1711
           if (num < 0)							\
1712
              num = 0;							\
1713
           num = num * 10 + c - '0'; 					\
1714
           if (p == pend) 						\
1715
              break; 							\
1716
           PATFETCH (c);						\
1717
         } 								\
1718
       } 								\
1719
    }
1720
1721
#if defined _LIBC || WIDE_CHAR_SUPPORT
1722
/* The GNU C library provides support for user-defined character classes
1723
   and the functions from ISO C amendement 1.  */
1724
# ifdef CHARCLASS_NAME_MAX
1725
#  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
1726
# else
1727
/* This shouldn't happen but some implementation might still have this
1728
   problem.  Use a reasonable default value.  */
1729
#  define CHAR_CLASS_MAX_LENGTH 256
1730
# endif
1731
1732
# ifdef _LIBC
1733
#  define IS_CHAR_CLASS(string) __wctype (string)
1734
# else
1735
#  define IS_CHAR_CLASS(string) wctype (string)
1736
# endif
1737
#else
1738
# define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1739
1740
# define IS_CHAR_CLASS(string)						\
1741
   (STREQ (string, "alpha") || STREQ (string, "upper")			\
1742
    || STREQ (string, "lower") || STREQ (string, "digit")		\
1743
    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1744
    || STREQ (string, "space") || STREQ (string, "print")		\
1745
    || STREQ (string, "punct") || STREQ (string, "graph")		\
1746
    || STREQ (string, "cntrl") || STREQ (string, "blank"))
1747
#endif
1748
1749
#ifndef MATCH_MAY_ALLOCATE
1750
1751
/* If we cannot allocate large objects within re_match_2_internal,
1752
   we make the fail stack and register vectors global.
1753
   The fail stack, we grow to the maximum size when a regexp
1754
   is compiled.
1755
   The register vectors, we adjust in size each time we
1756
   compile a regexp, according to the number of registers it needs.  */
1757
1758
static fail_stack_type fail_stack;
1759
1760
/* Size with which the following vectors are currently allocated.
1761
   That is so we can make them bigger as needed,
1762
   but never make them smaller.  */
1763
static int regs_allocated_size;
1764
1765
static const char **     regstart, **     regend;
1766
static const char ** old_regstart, ** old_regend;
1767
static const char **best_regstart, **best_regend;
1768
static register_info_type *reg_info;
1769
static const char **reg_dummy;
1770
static register_info_type *reg_info_dummy;
1771
1772
/* Make the register vectors big enough for NUM_REGS registers,
1773
   but don't make them smaller.  */
1774
1775
static
1776
regex_grow_registers (num_regs)
1777
     int num_regs;
1778
{
1779
  if (num_regs > regs_allocated_size)
1780
    {
1781
      RETALLOC_IF (regstart,	 num_regs, const char *);
1782
      RETALLOC_IF (regend,	 num_regs, const char *);
1783
      RETALLOC_IF (old_regstart, num_regs, const char *);
1784
      RETALLOC_IF (old_regend,	 num_regs, const char *);
1785
      RETALLOC_IF (best_regstart, num_regs, const char *);
1786
      RETALLOC_IF (best_regend,	 num_regs, const char *);
1787
      RETALLOC_IF (reg_info,	 num_regs, register_info_type);
1788
      RETALLOC_IF (reg_dummy,	 num_regs, const char *);
1789
      RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1790
1791
      regs_allocated_size = num_regs;
1792
    }
1793
}
1794
1795
#endif /* not MATCH_MAY_ALLOCATE */
1796
1797
static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
1798
						 compile_stack,
1799
						 regnum_t regnum));
1800
1801
/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1802
   Returns one of error codes defined in `regex.h', or zero for success.
1803
1804
   Assumes the `allocated' (and perhaps `buffer') and `translate'
1805
   fields are set in BUFP on entry.
1806
1807
   If it succeeds, results are put in BUFP (if it returns an error, the
1808
   contents of BUFP are undefined):
1809
     `buffer' is the compiled pattern;
1810
     `syntax' is set to SYNTAX;
1811
     `used' is set to the length of the compiled pattern;
1812
     `fastmap_accurate' is zero;
1813
     `re_nsub' is the number of subexpressions in PATTERN;
1814
     `not_bol' and `not_eol' are zero;
1815
1816
   The `fastmap' and `newline_anchor' fields are neither
1817
   examined nor set.  */
1818
1819
/* Return, freeing storage we allocated.  */
1820
#define FREE_STACK_RETURN(value)		\
1821
  return (free (compile_stack.stack), value)
1822
1823
static reg_errcode_t
1824
regex_compile (pattern, size, syntax, bufp)
1825
     const char *pattern;
1826
     size_t size;
1827
     reg_syntax_t syntax;
1828
     struct re_pattern_buffer *bufp;
1829
{
1830
  /* We fetch characters from PATTERN here.  Even though PATTERN is
1831
     `char *' (i.e., signed), we declare these variables as unsigned, so
1832
     they can be reliably used as array indices.  */
1833
  register unsigned char c, c1;
1834
1835
  /* A random temporary spot in PATTERN.  */
1836
  const char *p1;
1837
1838
  /* Points to the end of the buffer, where we should append.  */
1839
  register unsigned char *b;
1840
1841
  /* Keeps track of unclosed groups.  */
1842
  compile_stack_type compile_stack;
1843
1844
  /* Points to the current (ending) position in the pattern.  */
1845
  const char *p = pattern;
1846
  const char *pend = pattern + size;
1847
1848
  /* How to translate the characters in the pattern.  */
1849
  RE_TRANSLATE_TYPE translate = bufp->translate;
1850
1851
  /* Address of the count-byte of the most recently inserted `exactn'
1852
     command.  This makes it possible to tell if a new exact-match
1853
     character can be added to that command or if the character requires
1854
     a new `exactn' command.  */
1855
  unsigned char *pending_exact = 0;
1856
1857
  /* Address of start of the most recently finished expression.
1858
     This tells, e.g., postfix * where to find the start of its
1859
     operand.  Reset at the beginning of groups and alternatives.  */
1860
  unsigned char *laststart = 0;
1861
1862
  /* Address of beginning of regexp, or inside of last group.  */
1863
  unsigned char *begalt;
1864
1865
  /* Place in the uncompiled pattern (i.e., the {) to
1866
     which to go back if the interval is invalid.  */
1867
  const char *beg_interval;
1868
1869
  /* Address of the place where a forward jump should go to the end of
1870
     the containing expression.  Each alternative of an `or' -- except the
1871
     last -- ends with a forward jump of this sort.  */
1872
  unsigned char *fixup_alt_jump = 0;
1873
1874
  /* Counts open-groups as they are encountered.  Remembered for the
1875
     matching close-group on the compile stack, so the same register
1876
     number is put in the stop_memory as the start_memory.  */
1877
  regnum_t regnum = 0;
1878
1879
#ifdef DEBUG
1880
  DEBUG_PRINT1 ("\nCompiling pattern: ");
1881
  if (debug)
1882
    {
1883
      unsigned debug_count;
1884
1885
      for (debug_count = 0; debug_count < size; debug_count++)
1886
        putchar (pattern[debug_count]);
1887
      putchar ('\n');
1888
    }
1889
#endif /* DEBUG */
1890
1891
  /* Initialize the compile stack.  */
1892
  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1893
  if (compile_stack.stack == NULL)
1894
    return REG_ESPACE;
1895
1896
  compile_stack.size = INIT_COMPILE_STACK_SIZE;
1897
  compile_stack.avail = 0;
1898
1899
  /* Initialize the pattern buffer.  */
1900
  bufp->syntax = syntax;
1901
  bufp->fastmap_accurate = 0;
1902
  bufp->not_bol = bufp->not_eol = 0;
1903
1904
  /* Set `used' to zero, so that if we return an error, the pattern
1905
     printer (for debugging) will think there's no pattern.  We reset it
1906
     at the end.  */
1907
  bufp->used = 0;
1908
1909
  /* Always count groups, whether or not bufp->no_sub is set.  */
1910
  bufp->re_nsub = 0;
1911
1912
#if !defined emacs && !defined SYNTAX_TABLE
1913
  /* Initialize the syntax table.  */
1914
   init_syntax_once ();
1915
#endif
1916
1917
  if (bufp->allocated == 0)
1918
    {
1919
      if (bufp->buffer)
1920
	{ /* If zero allocated, but buffer is non-null, try to realloc
1921
             enough space.  This loses if buffer's address is bogus, but
1922
             that is the user's responsibility.  */
1923
          RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1924
        }
1925
      else
1926
        { /* Caller did not allocate a buffer.  Do it for them.  */
1927
          bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1928
        }
1929
      if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1930
1931
      bufp->allocated = INIT_BUF_SIZE;
1932
    }
1933
1934
  begalt = b = bufp->buffer;
1935
1936
  /* Loop through the uncompiled pattern until we're at the end.  */
1937
  while (p != pend)
1938
    {
1939
      PATFETCH (c);
1940
1941
      switch (c)
1942
        {
1943
        case '^':
1944
          {
1945
            if (   /* If at start of pattern, it's an operator.  */
1946
                   p == pattern + 1
1947
                   /* If context independent, it's an operator.  */
1948
                || syntax & RE_CONTEXT_INDEP_ANCHORS
1949
                   /* Otherwise, depends on what's come before.  */
1950
                || at_begline_loc_p (pattern, p, syntax))
1951
              BUF_PUSH (begline);
1952
            else
1953
              goto normal_char;
1954
          }
1955
          break;
1956
1957
1958
        case '$':
1959
          {
1960
            if (   /* If at end of pattern, it's an operator.  */
1961
                   p == pend
1962
                   /* If context independent, it's an operator.  */
1963
                || syntax & RE_CONTEXT_INDEP_ANCHORS
1964
                   /* Otherwise, depends on what's next.  */
1965
                || at_endline_loc_p (p, pend, syntax))
1966
               BUF_PUSH (endline);
1967
             else
1968
               goto normal_char;
1969
           }
1970
           break;
1971
1972
1973
	case '+':
1974
        case '?':
1975
          if ((syntax & RE_BK_PLUS_QM)
1976
              || (syntax & RE_LIMITED_OPS))
1977
            goto normal_char;
1978
        handle_plus:
1979
        case '*':
1980
          /* If there is no previous pattern... */
1981
          if (!laststart)
1982
            {
1983
              if (syntax & RE_CONTEXT_INVALID_OPS)
1984
                FREE_STACK_RETURN (REG_BADRPT);
1985
              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1986
                goto normal_char;
1987
            }
1988
1989
          {
1990
            /* Are we optimizing this jump?  */
1991
            boolean keep_string_p = false;
1992
1993
            /* 1 means zero (many) matches is allowed.  */
1994
            char zero_times_ok = 0, many_times_ok = 0;
1995
1996
            /* If there is a sequence of repetition chars, collapse it
1997
               down to just one (the right one).  We can't combine
1998
               interval operators with these because of, e.g., `a{2}*',
1999
               which should only match an even number of `a's.  */
2000
2001
            for (;;)
2002
              {
2003
                zero_times_ok |= c != '+';
2004
                many_times_ok |= c != '?';
2005
2006
                if (p == pend)
2007
                  break;
2008
2009
                PATFETCH (c);
2010
2011
                if (c == '*'
2012
                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2013
                  ;
2014
2015
                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
2016
                  {
2017
                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2018
2019
                    PATFETCH (c1);
2020
                    if (!(c1 == '+' || c1 == '?'))
2021
                      {
2022
                        PATUNFETCH;
2023
                        PATUNFETCH;
2024
                        break;
2025
                      }
2026
2027
                    c = c1;
2028
                  }
2029
                else
2030
                  {
2031
                    PATUNFETCH;
2032
                    break;
2033
                  }
2034
2035
                /* If we get here, we found another repeat character.  */
2036
               }
2037
2038
            /* Star, etc. applied to an empty pattern is equivalent
2039
               to an empty pattern.  */
2040
            if (!laststart)
2041
              break;
2042
2043
            /* Now we know whether or not zero matches is allowed
2044
               and also whether or not two or more matches is allowed.  */
2045
            if (many_times_ok)
2046
              { /* More than one repetition is allowed, so put in at the
2047
                   end a backward relative jump from `b' to before the next
2048
                   jump we're going to put in below (which jumps from
2049
                   laststart to after this jump).
2050
2051
                   But if we are at the `*' in the exact sequence `.*\n',
2052
                   insert an unconditional jump backwards to the .,
2053
                   instead of the beginning of the loop.  This way we only
2054
                   push a failure point once, instead of every time
2055
                   through the loop.  */
2056
                assert (p - 1 > pattern);
2057
2058
                /* Allocate the space for the jump.  */
2059
                GET_BUFFER_SPACE (3);
2060
2061
                /* We know we are not at the first character of the pattern,
2062
                   because laststart was nonzero.  And we've already
2063
                   incremented `p', by the way, to be the character after
2064
                   the `*'.  Do we have to do something analogous here
2065
                   for null bytes, because of RE_DOT_NOT_NULL?  */
2066
                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2067
		    && zero_times_ok
2068
                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2069
                    && !(syntax & RE_DOT_NEWLINE))
2070
                  { /* We have .*\n.  */
2071
                    STORE_JUMP (jump, b, laststart);
2072
                    keep_string_p = true;
2073
                  }
2074
                else
2075
                  /* Anything else.  */
2076
                  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
2077
2078
                /* We've added more stuff to the buffer.  */
2079
                b += 3;
2080
              }
2081
2082
            /* On failure, jump from laststart to b + 3, which will be the
2083
               end of the buffer after this jump is inserted.  */
2084
            GET_BUFFER_SPACE (3);
2085
            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2086
                                       : on_failure_jump,
2087
                         laststart, b + 3);
2088
            pending_exact = 0;
2089
            b += 3;
2090
2091
            if (!zero_times_ok)
2092
              {
2093
                /* At least one repetition is required, so insert a
2094
                   `dummy_failure_jump' before the initial
2095
                   `on_failure_jump' instruction of the loop. This
2096
                   effects a skip over that instruction the first time
2097
                   we hit that loop.  */
2098
                GET_BUFFER_SPACE (3);
2099
                INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2100
                b += 3;
2101
              }
2102
            }
2103
	  break;
2104
2105
2106
	case '.':
2107
          laststart = b;
2108
          BUF_PUSH (anychar);
2109
          break;
2110
2111
2112
        case '[':
2113
          {
2114
            boolean had_char_class = false;
2115
2116
            if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2117
2118
            /* Ensure that we have enough space to push a charset: the
2119
               opcode, the length count, and the bitset; 34 bytes in all.  */
2120
	    GET_BUFFER_SPACE (34);
2121
2122
            laststart = b;
2123
2124
            /* We test `*p == '^' twice, instead of using an if
2125
               statement, so we only need one BUF_PUSH.  */
2126
            BUF_PUSH (*p == '^' ? charset_not : charset);
2127
            if (*p == '^')
2128
              p++;
2129
2130
            /* Remember the first position in the bracket expression.  */
2131
            p1 = p;
2132
2133
            /* Push the number of bytes in the bitmap.  */
2134
            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2135
2136
            /* Clear the whole map.  */
2137
            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2138
2139
            /* charset_not matches newline according to a syntax bit.  */
2140
            if ((re_opcode_t) b[-2] == charset_not
2141
                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2142
              SET_LIST_BIT ('\n');
2143
2144
            /* Read in characters and ranges, setting map bits.  */
2145
            for (;;)
2146
              {
2147
                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2148
2149
                PATFETCH (c);
2150
2151
                /* \ might escape characters inside [...] and [^...].  */
2152
                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2153
                  {
2154
                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2155
2156
                    PATFETCH (c1);
2157
                    SET_LIST_BIT (c1);
2158
                    continue;
2159
                  }
2160
2161
                /* Could be the end of the bracket expression.  If it's
2162
                   not (i.e., when the bracket expression is `[]' so
2163
                   far), the ']' character bit gets set way below.  */
2164
                if (c == ']' && p != p1 + 1)
2165
                  break;
2166
2167
                /* Look ahead to see if it's a range when the last thing
2168
                   was a character class.  */
2169
                if (had_char_class && c == '-' && *p != ']')
2170
                  FREE_STACK_RETURN (REG_ERANGE);
2171
2172
                /* Look ahead to see if it's a range when the last thing
2173
                   was a character: if this is a hyphen not at the
2174
                   beginning or the end of a list, then it's the range
2175
                   operator.  */
2176
                if (c == '-'
2177
                    && !(p - 2 >= pattern && p[-2] == '[')
2178
                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2179
                    && *p != ']')
2180
                  {
2181
                    reg_errcode_t ret
2182
                      = compile_range (&p, pend, translate, syntax, b);
2183
                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2184
                  }
2185
2186
                else if (p[0] == '-' && p[1] != ']')
2187
                  { /* This handles ranges made up of characters only.  */
2188
                    reg_errcode_t ret;
2189
2190
		    /* Move past the `-'.  */
2191
                    PATFETCH (c1);
2192
2193
                    ret = compile_range (&p, pend, translate, syntax, b);
2194
                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2195
                  }
2196
2197
                /* See if we're at the beginning of a possible character
2198
                   class.  */
2199
2200
                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2201
                  { /* Leave room for the null.  */
2202
                    char str[CHAR_CLASS_MAX_LENGTH + 1];
2203
2204
                    PATFETCH (c);
2205
                    c1 = 0;
2206
2207
                    /* If pattern is `[[:'.  */
2208
                    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2209
2210
                    for (;;)
2211
                      {
2212
                        PATFETCH (c);
2213
                        if ((c == ':' && *p == ']') || p == pend)
2214
                          break;
2215
			if (c1 < CHAR_CLASS_MAX_LENGTH)
2216
			  str[c1++] = c;
2217
			else
2218
			  /* This is in any case an invalid class name.  */
2219
			  str[0] = '\0';
2220
                      }
2221
                    str[c1] = '\0';
2222
2223
                    /* If isn't a word bracketed by `[:' and `:]':
2224
                       undo the ending character, the letters, and leave
2225
                       the leading `:' and `[' (but set bits for them).  */
2226
                    if (c == ':' && *p == ']')
2227
                      {
2228
#if defined _LIBC || WIDE_CHAR_SUPPORT
2229
                        boolean is_lower = STREQ (str, "lower");
2230
                        boolean is_upper = STREQ (str, "upper");
2231
			wctype_t wt;
2232
                        int ch;
2233
2234
			wt = IS_CHAR_CLASS (str);
2235
			if (wt == 0)
2236
			  FREE_STACK_RETURN (REG_ECTYPE);
2237
2238
                        /* Throw away the ] at the end of the character
2239
                           class.  */
2240
                        PATFETCH (c);
2241
2242
                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2243
2244
                        for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
2245
			  {
2246
# ifdef _LIBC
2247
			    if (__iswctype (__btowc (ch), wt))
2248
			      SET_LIST_BIT (ch);
2249
# else
2250
			    if (iswctype (btowc (ch), wt))
2251
			      SET_LIST_BIT (ch);
2252
# endif
2253
2254
			    if (translate && (is_upper || is_lower)
2255
				&& (ISUPPER (ch) || ISLOWER (ch)))
2256
			      SET_LIST_BIT (ch);
2257
			  }
2258
2259
                        had_char_class = true;
2260
#else
2261
                        int ch;
2262
                        boolean is_alnum = STREQ (str, "alnum");
2263
                        boolean is_alpha = STREQ (str, "alpha");
2264
                        boolean is_blank = STREQ (str, "blank");
2265
                        boolean is_cntrl = STREQ (str, "cntrl");
2266
                        boolean is_digit = STREQ (str, "digit");
2267
                        boolean is_graph = STREQ (str, "graph");
2268
                        boolean is_lower = STREQ (str, "lower");
2269
                        boolean is_print = STREQ (str, "print");
2270
                        boolean is_punct = STREQ (str, "punct");
2271
                        boolean is_space = STREQ (str, "space");
2272
                        boolean is_upper = STREQ (str, "upper");
2273
                        boolean is_xdigit = STREQ (str, "xdigit");
2274
2275
                        if (!IS_CHAR_CLASS (str))
2276
			  FREE_STACK_RETURN (REG_ECTYPE);
2277
2278
                        /* Throw away the ] at the end of the character
2279
                           class.  */
2280
                        PATFETCH (c);
2281
2282
                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2283
2284
                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2285
                          {
2286
			    /* This was split into 3 if's to
2287
			       avoid an arbitrary limit in some compiler.  */
2288
                            if (   (is_alnum  && ISALNUM (ch))
2289
                                || (is_alpha  && ISALPHA (ch))
2290
                                || (is_blank  && ISBLANK (ch))
2291
                                || (is_cntrl  && ISCNTRL (ch)))
2292
			      SET_LIST_BIT (ch);
2293
			    if (   (is_digit  && ISDIGIT (ch))
2294
                                || (is_graph  && ISGRAPH (ch))
2295
                                || (is_lower  && ISLOWER (ch))
2296
                                || (is_print  && ISPRINT (ch)))
2297
			      SET_LIST_BIT (ch);
2298
			    if (   (is_punct  && ISPUNCT (ch))
2299
                                || (is_space  && ISSPACE (ch))
2300
                                || (is_upper  && ISUPPER (ch))
2301
                                || (is_xdigit && ISXDIGIT (ch)))
2302
			      SET_LIST_BIT (ch);
2303
			    if (   translate && (is_upper || is_lower)
2304
				&& (ISUPPER (ch) || ISLOWER (ch)))
2305
			      SET_LIST_BIT (ch);
2306
                          }
2307
                        had_char_class = true;
2308
#endif	/* libc || wctype.h */
2309
                      }
2310
                    else
2311
                      {
2312
                        c1++;
2313
                        while (c1--)
2314
                          PATUNFETCH;
2315
                        SET_LIST_BIT ('[');
2316
                        SET_LIST_BIT (':');
2317
                        had_char_class = false;
2318
                      }
2319
                  }
2320
                else
2321
                  {
2322
                    had_char_class = false;
2323
                    SET_LIST_BIT (c);
2324
                  }
2325
              }
2326
2327
            /* Discard any (non)matching list bytes that are all 0 at the
2328
               end of the map.  Decrease the map-length byte too.  */
2329
            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2330
              b[-1]--;
2331
            b += b[-1];
2332
          }
2333
          break;
2334
2335
2336
	case '(':
2337
          if (syntax & RE_NO_BK_PARENS)
2338
            goto handle_open;
2339
          else
2340
            goto normal_char;
2341
2342
2343
        case ')':
2344
          if (syntax & RE_NO_BK_PARENS)
2345
            goto handle_close;
2346
          else
2347
            goto normal_char;
2348
2349
2350
        case '\n':
2351
          if (syntax & RE_NEWLINE_ALT)
2352
            goto handle_alt;
2353
          else
2354
            goto normal_char;
2355
2356
2357
	case '|':
2358
          if (syntax & RE_NO_BK_VBAR)
2359
            goto handle_alt;
2360
          else
2361
            goto normal_char;
2362
2363
2364
        case '{':
2365
           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2366
             goto handle_interval;
2367
           else
2368
             goto normal_char;
2369
2370
2371
        case '\\':
2372
          if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2373
2374
          /* Do not translate the character after the \, so that we can
2375
             distinguish, e.g., \B from \b, even if we normally would
2376
             translate, e.g., B to b.  */
2377
          PATFETCH_RAW (c);
2378
2379
          switch (c)
2380
            {
2381
            case '(':
2382
              if (syntax & RE_NO_BK_PARENS)
2383
                goto normal_backslash;
2384
2385
            handle_open:
2386
              bufp->re_nsub++;
2387
              regnum++;
2388
2389
              if (COMPILE_STACK_FULL)
2390
                {
2391
                  RETALLOC (compile_stack.stack, compile_stack.size << 1,
2392
                            compile_stack_elt_t);
2393
                  if (compile_stack.stack == NULL) return REG_ESPACE;
2394
2395
                  compile_stack.size <<= 1;
2396
                }
2397
2398
              /* These are the values to restore when we hit end of this
2399
                 group.  They are all relative offsets, so that if the
2400
                 whole pattern moves because of realloc, they will still
2401
                 be valid.  */
2402
              COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2403
              COMPILE_STACK_TOP.fixup_alt_jump
2404
                = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2405
              COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2406
              COMPILE_STACK_TOP.regnum = regnum;
2407
2408
              /* We will eventually replace the 0 with the number of
2409
                 groups inner to this one.  But do not push a
2410
                 start_memory for groups beyond the last one we can
2411
                 represent in the compiled pattern.  */
2412
              if (regnum <= MAX_REGNUM)
2413
                {
2414
                  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2415
                  BUF_PUSH_3 (start_memory, regnum, 0);
2416
                }
2417
2418
              compile_stack.avail++;
2419
2420
              fixup_alt_jump = 0;
2421
              laststart = 0;
2422
              begalt = b;
2423
	      /* If we've reached MAX_REGNUM groups, then this open
2424
		 won't actually generate any code, so we'll have to
2425
		 clear pending_exact explicitly.  */
2426
	      pending_exact = 0;
2427
              break;
2428
2429
2430
            case ')':
2431
              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2432
2433
              if (COMPILE_STACK_EMPTY)
2434
		{
2435
		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2436
		    goto normal_backslash;
2437
		  else
2438
		    FREE_STACK_RETURN (REG_ERPAREN);
2439
		}
2440
2441
            handle_close:
2442
              if (fixup_alt_jump)
2443
                { /* Push a dummy failure point at the end of the
2444
                     alternative for a possible future
2445
                     `pop_failure_jump' to pop.  See comments at
2446
                     `push_dummy_failure' in `re_match_2'.  */
2447
                  BUF_PUSH (push_dummy_failure);
2448
2449
                  /* We allocated space for this jump when we assigned
2450
                     to `fixup_alt_jump', in the `handle_alt' case below.  */
2451
                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2452
                }
2453
2454
              /* See similar code for backslashed left paren above.  */
2455
              if (COMPILE_STACK_EMPTY)
2456
		{
2457
		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2458
		    goto normal_char;
2459
		  else
2460
		    FREE_STACK_RETURN (REG_ERPAREN);
2461
		}
2462
2463
              /* Since we just checked for an empty stack above, this
2464
                 ``can't happen''.  */
2465
              assert (compile_stack.avail != 0);
2466
              {
2467
                /* We don't just want to restore into `regnum', because
2468
                   later groups should continue to be numbered higher,
2469
                   as in `(ab)c(de)' -- the second group is #2.  */
2470
                regnum_t this_group_regnum;
2471
2472
                compile_stack.avail--;
2473
                begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2474
                fixup_alt_jump
2475
                  = COMPILE_STACK_TOP.fixup_alt_jump
2476
                    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2477
                    : 0;
2478
                laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2479
                this_group_regnum = COMPILE_STACK_TOP.regnum;
2480
		/* If we've reached MAX_REGNUM groups, then this open
2481
		   won't actually generate any code, so we'll have to
2482
		   clear pending_exact explicitly.  */
2483
		pending_exact = 0;
2484
2485
                /* We're at the end of the group, so now we know how many
2486
                   groups were inside this one.  */
2487
                if (this_group_regnum <= MAX_REGNUM)
2488
                  {
2489
                    unsigned char *inner_group_loc
2490
                      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2491
2492
                    *inner_group_loc = regnum - this_group_regnum;
2493
                    BUF_PUSH_3 (stop_memory, this_group_regnum,
2494
                                regnum - this_group_regnum);
2495
                  }
2496
              }
2497
              break;
2498
2499
2500
            case '|':					/* `\|'.  */
2501
              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2502
                goto normal_backslash;
2503
            handle_alt:
2504
              if (syntax & RE_LIMITED_OPS)
2505
                goto normal_char;
2506
2507
              /* Insert before the previous alternative a jump which
2508
                 jumps to this alternative if the former fails.  */
2509
              GET_BUFFER_SPACE (3);
2510
              INSERT_JUMP (on_failure_jump, begalt, b + 6);
2511
              pending_exact = 0;
2512
              b += 3;
2513
2514
              /* The alternative before this one has a jump after it
2515
                 which gets executed if it gets matched.  Adjust that
2516
                 jump so it will jump to this alternative's analogous
2517
                 jump (put in below, which in turn will jump to the next
2518
                 (if any) alternative's such jump, etc.).  The last such
2519
                 jump jumps to the correct final destination.  A picture:
2520
                          _____ _____
2521
                          |   | |   |
2522
                          |   v |   v
2523
                         a | b   | c
2524
2525
                 If we are at `b', then fixup_alt_jump right now points to a
2526
                 three-byte space after `a'.  We'll put in the jump, set
2527
                 fixup_alt_jump to right after `b', and leave behind three
2528
                 bytes which we'll fill in when we get to after `c'.  */
2529
2530
              if (fixup_alt_jump)
2531
                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2532
2533
              /* Mark and leave space for a jump after this alternative,
2534
                 to be filled in later either by next alternative or
2535
                 when know we're at the end of a series of alternatives.  */
2536
              fixup_alt_jump = b;
2537
              GET_BUFFER_SPACE (3);
2538
              b += 3;
2539
2540
              laststart = 0;
2541
              begalt = b;
2542
              break;
2543
2544
2545
            case '{':
2546
              /* If \{ is a literal.  */
2547
              if (!(syntax & RE_INTERVALS)
2548
                     /* If we're at `\{' and it's not the open-interval
2549
                        operator.  */
2550
                  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2551
                  || (p - 2 == pattern  &&  p == pend))
2552
                goto normal_backslash;
2553
2554
            handle_interval:
2555
              {
2556
                /* If got here, then the syntax allows intervals.  */
2557
2558
                /* At least (most) this many matches must be made.  */
2559
                int lower_bound = -1, upper_bound = -1;
2560
2561
                beg_interval = p - 1;
2562
2563
                if (p == pend)
2564
                  {
2565
                    if (syntax & RE_NO_BK_BRACES)
2566
                      goto unfetch_interval;
2567
                    else
2568
                      FREE_STACK_RETURN (REG_EBRACE);
2569
                  }
2570
2571
                GET_UNSIGNED_NUMBER (lower_bound);
2572
2573
                if (c == ',')
2574
                  {
2575
                    GET_UNSIGNED_NUMBER (upper_bound);
2576
                    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2577
                  }
2578
                else
2579
                  /* Interval such as `{1}' => match exactly once. */
2580
                  upper_bound = lower_bound;
2581
2582
                if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2583
                    || lower_bound > upper_bound)
2584
                  {
2585
                    if (syntax & RE_NO_BK_BRACES)
2586
                      goto unfetch_interval;
2587
                    else
2588
                      FREE_STACK_RETURN (REG_BADBR);
2589
                  }
2590
2591
                if (!(syntax & RE_NO_BK_BRACES))
2592
                  {
2593
                    if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2594
2595
                    PATFETCH (c);
2596
                  }
2597
2598
                if (c != '}')
2599
                  {
2600
                    if (syntax & RE_NO_BK_BRACES)
2601
                      goto unfetch_interval;
2602
                    else
2603
                      FREE_STACK_RETURN (REG_BADBR);
2604
                  }
2605
2606
                /* We just parsed a valid interval.  */
2607
2608
                /* If it's invalid to have no preceding re.  */
2609
                if (!laststart)
2610
                  {
2611
                    if (syntax & RE_CONTEXT_INVALID_OPS)
2612
                      FREE_STACK_RETURN (REG_BADRPT);
2613
                    else if (syntax & RE_CONTEXT_INDEP_OPS)
2614
                      laststart = b;
2615
                    else
2616
                      goto unfetch_interval;
2617
                  }
2618
2619
                /* If the upper bound is zero, don't want to succeed at
2620
                   all; jump from `laststart' to `b + 3', which will be
2621
                   the end of the buffer after we insert the jump.  */
2622
                 if (upper_bound == 0)
2623
                   {
2624
                     GET_BUFFER_SPACE (3);
2625
                     INSERT_JUMP (jump, laststart, b + 3);
2626
                     b += 3;
2627
                   }
2628
2629
                 /* Otherwise, we have a nontrivial interval.  When
2630
                    we're all done, the pattern will look like:
2631
                      set_number_at <jump count> <upper bound>
2632
                      set_number_at <succeed_n count> <lower bound>
2633
                      succeed_n <after jump addr> <succeed_n count>
2634
                      <body of loop>
2635
                      jump_n <succeed_n addr> <jump count>
2636
                    (The upper bound and `jump_n' are omitted if
2637
                    `upper_bound' is 1, though.)  */
2638
                 else
2639
                   { /* If the upper bound is > 1, we need to insert
2640
                        more at the end of the loop.  */
2641
                     unsigned nbytes = 10 + (upper_bound > 1) * 10;
2642
2643
                     GET_BUFFER_SPACE (nbytes);
2644
2645
                     /* Initialize lower bound of the `succeed_n', even
2646
                        though it will be set during matching by its
2647
                        attendant `set_number_at' (inserted next),
2648
                        because `re_compile_fastmap' needs to know.
2649
                        Jump to the `jump_n' we might insert below.  */
2650
                     INSERT_JUMP2 (succeed_n, laststart,
2651
                                   b + 5 + (upper_bound > 1) * 5,
2652
                                   lower_bound);
2653
                     b += 5;
2654
2655
                     /* Code to initialize the lower bound.  Insert
2656
                        before the `succeed_n'.  The `5' is the last two
2657
                        bytes of this `set_number_at', plus 3 bytes of
2658
                        the following `succeed_n'.  */
2659
                     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2660
                     b += 5;
2661
2662
                     if (upper_bound > 1)
2663
                       { /* More than one repetition is allowed, so
2664
                            append a backward jump to the `succeed_n'
2665
                            that starts this interval.
2666
2667
                            When we've reached this during matching,
2668
                            we'll have matched the interval once, so
2669
                            jump back only `upper_bound - 1' times.  */
2670
                         STORE_JUMP2 (jump_n, b, laststart + 5,
2671
                                      upper_bound - 1);
2672
                         b += 5;
2673
2674
                         /* The location we want to set is the second
2675
                            parameter of the `jump_n'; that is `b-2' as
2676
                            an absolute address.  `laststart' will be
2677
                            the `set_number_at' we're about to insert;
2678
                            `laststart+3' the number to set, the source
2679
                            for the relative address.  But we are
2680
                            inserting into the middle of the pattern --
2681
                            so everything is getting moved up by 5.
2682
                            Conclusion: (b - 2) - (laststart + 3) + 5,
2683
                            i.e., b - laststart.
2684
2685
                            We insert this at the beginning of the loop
2686
                            so that if we fail during matching, we'll
2687
                            reinitialize the bounds.  */
2688
                         insert_op2 (set_number_at, laststart, b - laststart,
2689
                                     upper_bound - 1, b);
2690
                         b += 5;
2691
                       }
2692
                   }
2693
                pending_exact = 0;
2694
                beg_interval = NULL;
2695
              }
2696
              break;
2697
2698
            unfetch_interval:
2699
              /* If an invalid interval, match the characters as literals.  */
2700
               assert (beg_interval);
2701
               p = beg_interval;
2702
               beg_interval = NULL;
2703
2704
               /* normal_char and normal_backslash need `c'.  */
2705
               PATFETCH (c);
2706
2707
               if (!(syntax & RE_NO_BK_BRACES))
2708
                 {
2709
                   if (p > pattern  &&  p[-1] == '\\')
2710
                     goto normal_backslash;
2711
                 }
2712
               goto normal_char;
2713
2714
#ifdef emacs
2715
            /* There is no way to specify the before_dot and after_dot
2716
               operators.  rms says this is ok.  --karl  */
2717
            case '=':
2718
              BUF_PUSH (at_dot);
2719
              break;
2720
2721
            case 's':
2722
              laststart = b;
2723
              PATFETCH (c);
2724
              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2725
              break;
2726
2727
            case 'S':
2728
              laststart = b;
2729
              PATFETCH (c);
2730
              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2731
              break;
2732
#endif /* emacs */
2733
2734
2735
            case 'w':
2736
	      if (syntax & RE_NO_GNU_OPS)
2737
		goto normal_char;
2738
              laststart = b;
2739
              BUF_PUSH (wordchar);
2740
              break;
2741
2742
2743
            case 'W':
2744
	      if (syntax & RE_NO_GNU_OPS)
2745
		goto normal_char;
2746
              laststart = b;
2747
              BUF_PUSH (notwordchar);
2748
              break;
2749
2750
2751
            case '<':
2752
	      if (syntax & RE_NO_GNU_OPS)
2753
		goto normal_char;
2754
              BUF_PUSH (wordbeg);
2755
              break;
2756
2757
            case '>':
2758
	      if (syntax & RE_NO_GNU_OPS)
2759
		goto normal_char;
2760
              BUF_PUSH (wordend);
2761
              break;
2762
2763
            case 'b':
2764
	      if (syntax & RE_NO_GNU_OPS)
2765
		goto normal_char;
2766
              BUF_PUSH (wordbound);
2767
              break;
2768
2769
            case 'B':
2770
	      if (syntax & RE_NO_GNU_OPS)
2771
		goto normal_char;
2772
              BUF_PUSH (notwordbound);
2773
              break;
2774
2775
            case '`':
2776
	      if (syntax & RE_NO_GNU_OPS)
2777
		goto normal_char;
2778
              BUF_PUSH (begbuf);
2779
              break;
2780
2781
            case '\'':
2782
	      if (syntax & RE_NO_GNU_OPS)
2783
		goto normal_char;
2784
              BUF_PUSH (endbuf);
2785
              break;
2786
2787
            case '1': case '2': case '3': case '4': case '5':
2788
            case '6': case '7': case '8': case '9':
2789
              if (syntax & RE_NO_BK_REFS)
2790
                goto normal_char;
2791
2792
              c1 = c - '0';
2793
2794
              if (c1 > regnum)
2795
                FREE_STACK_RETURN (REG_ESUBREG);
2796
2797
              /* Can't back reference to a subexpression if inside of it.  */
2798
              if (group_in_compile_stack (compile_stack, (regnum_t) c1))
2799
                goto normal_char;
2800
2801
              laststart = b;
2802
              BUF_PUSH_2 (duplicate, c1);
2803
              break;
2804
2805
2806
            case '+':
2807
            case '?':
2808
              if (syntax & RE_BK_PLUS_QM)
2809
                goto handle_plus;
2810
              else
2811
                goto normal_backslash;
2812
2813
            default:
2814
            normal_backslash:
2815
              /* You might think it would be useful for \ to mean
2816
                 not to translate; but if we don't translate it
2817
                 it will never match anything.  */
2818
              c = TRANSLATE (c);
2819
              goto normal_char;
2820
            }
2821
          break;
2822
2823
2824
	default:
2825
        /* Expects the character in `c'.  */
2826
	normal_char:
2827
	      /* If no exactn currently being built.  */
2828
          if (!pending_exact
2829
2830
              /* If last exactn not at current position.  */
2831
              || pending_exact + *pending_exact + 1 != b
2832
2833
              /* We have only one byte following the exactn for the count.  */
2834
	      || *pending_exact == (1 << BYTEWIDTH) - 1
2835
2836
              /* If followed by a repetition operator.  */
2837
              || *p == '*' || *p == '^'
2838
	      || ((syntax & RE_BK_PLUS_QM)
2839
		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2840
		  : (*p == '+' || *p == '?'))
2841
	      || ((syntax & RE_INTERVALS)
2842
                  && ((syntax & RE_NO_BK_BRACES)
2843
		      ? *p == '{'
2844
                      : (p[0] == '\\' && p[1] == '{'))))
2845
	    {
2846
	      /* Start building a new exactn.  */
2847
2848
              laststart = b;
2849
2850
	      BUF_PUSH_2 (exactn, 0);
2851
	      pending_exact = b - 1;
2852
            }
2853
2854
	  BUF_PUSH (c);
2855
          (*pending_exact)++;
2856
	  break;
2857
        } /* switch (c) */
2858
    } /* while p != pend */
2859
2860
2861
  /* Through the pattern now.  */
2862
2863
  if (fixup_alt_jump)
2864
    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2865
2866
  if (!COMPILE_STACK_EMPTY)
2867
    FREE_STACK_RETURN (REG_EPAREN);
2868
2869
  /* If we don't want backtracking, force success
2870
     the first time we reach the end of the compiled pattern.  */
2871
  if (syntax & RE_NO_POSIX_BACKTRACKING)
2872
    BUF_PUSH (succeed);
2873
2874
  free (compile_stack.stack);
2875
2876
  /* We have succeeded; set the length of the buffer.  */
2877
  bufp->used = b - bufp->buffer;
2878
2879
#ifdef DEBUG
2880
  if (debug)
2881
    {
2882
      DEBUG_PRINT1 ("\nCompiled pattern: \n");
2883
      print_compiled_pattern (bufp);
2884
    }
2885
#endif /* DEBUG */
2886
2887
#ifndef MATCH_MAY_ALLOCATE
2888
  /* Initialize the failure stack to the largest possible stack.  This
2889
     isn't necessary unless we're trying to avoid calling alloca in
2890
     the search and match routines.  */
2891
  {
2892
    int num_regs = bufp->re_nsub + 1;
2893
2894
    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
2895
       is strictly greater than re_max_failures, the largest possible stack
2896
       is 2 * re_max_failures failure points.  */
2897
    if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
2898
      {
2899
	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
2900
2901
# ifdef emacs
2902
	if (! fail_stack.stack)
2903
	  fail_stack.stack
2904
	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
2905
					    * sizeof (fail_stack_elt_t));
2906
	else
2907
	  fail_stack.stack
2908
	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
2909
					     (fail_stack.size
2910
					      * sizeof (fail_stack_elt_t)));
2911
# else /* not emacs */
2912
	if (! fail_stack.stack)
2913
	  fail_stack.stack
2914
	    = (fail_stack_elt_t *) malloc (fail_stack.size
2915
					   * sizeof (fail_stack_elt_t));
2916
	else
2917
	  fail_stack.stack
2918
	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
2919
					    (fail_stack.size
2920
					     * sizeof (fail_stack_elt_t)));
2921
# endif /* not emacs */
2922
      }
2923
2924
    regex_grow_registers (num_regs);
2925
  }
2926
#endif /* not MATCH_MAY_ALLOCATE */
2927
2928
  return REG_NOERROR;
2929
} /* regex_compile */
2930
2931
/* Subroutines for `regex_compile'.  */
2932
2933
/* Store OP at LOC followed by two-byte integer parameter ARG.  */
2934
2935
static void
2936
store_op1 (op, loc, arg)
2937
    re_opcode_t op;
2938
    unsigned char *loc;
2939
    int arg;
2940
{
2941
  *loc = (unsigned char) op;
2942
  STORE_NUMBER (loc + 1, arg);
2943
}
2944
2945
2946
/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2947
2948
static void
2949
store_op2 (op, loc, arg1, arg2)
2950
    re_opcode_t op;
2951
    unsigned char *loc;
2952
    int arg1, arg2;
2953
{
2954
  *loc = (unsigned char) op;
2955
  STORE_NUMBER (loc + 1, arg1);
2956
  STORE_NUMBER (loc + 3, arg2);
2957
}
2958
2959
2960
/* Copy the bytes from LOC to END to open up three bytes of space at LOC
2961
   for OP followed by two-byte integer parameter ARG.  */
2962
2963
static void
2964
insert_op1 (op, loc, arg, end)
2965
    re_opcode_t op;
2966
    unsigned char *loc;
2967
    int arg;
2968
    unsigned char *end;
2969
{
2970
  register unsigned char *pfrom = end;
2971
  register unsigned char *pto = end + 3;
2972
2973
  while (pfrom != loc)
2974
    *--pto = *--pfrom;
2975
2976
  store_op1 (op, loc, arg);
2977
}
2978
2979
2980
/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2981
2982
static void
2983
insert_op2 (op, loc, arg1, arg2, end)
2984
    re_opcode_t op;
2985
    unsigned char *loc;
2986
    int arg1, arg2;
2987
    unsigned char *end;
2988
{
2989
  register unsigned char *pfrom = end;
2990
  register unsigned char *pto = end + 5;
2991
2992
  while (pfrom != loc)
2993
    *--pto = *--pfrom;
2994
2995
  store_op2 (op, loc, arg1, arg2);
2996
}
2997
2998
2999
/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
3000
   after an alternative or a begin-subexpression.  We assume there is at
3001
   least one character before the ^.  */
3002
3003
static boolean
3004
at_begline_loc_p (pattern, p, syntax)
3005
    const char *pattern, *p;
3006
    reg_syntax_t syntax;
3007
{
3008
  const char *prev = p - 2;
3009
  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3010
3011
  return
3012
       /* After a subexpression?  */
3013
       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3014
       /* After an alternative?  */
3015
    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3016
}
3017
3018
3019
/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3020
   at least one character after the $, i.e., `P < PEND'.  */
3021
3022
static boolean
3023
at_endline_loc_p (p, pend, syntax)
3024
    const char *p, *pend;
3025
    reg_syntax_t syntax;
3026
{
3027
  const char *next = p;
3028
  boolean next_backslash = *next == '\\';
3029
  const char *next_next = p + 1 < pend ? p + 1 : 0;
3030
3031
  return
3032
       /* Before a subexpression?  */
3033
       (syntax & RE_NO_BK_PARENS ? *next == ')'
3034
        : next_backslash && next_next && *next_next == ')')
3035
       /* Before an alternative?  */
3036
    || (syntax & RE_NO_BK_VBAR ? *next == '|'
3037
        : next_backslash && next_next && *next_next == '|');
3038
}
3039
3040
3041
/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3042
   false if it's not.  */
3043
3044
static boolean
3045
group_in_compile_stack (compile_stack, regnum)
3046
    compile_stack_type compile_stack;
3047
    regnum_t regnum;
3048
{
3049
  int this_element;
3050
3051
  for (this_element = compile_stack.avail - 1;
3052
       this_element >= 0;
3053
       this_element--)
3054
    if (compile_stack.stack[this_element].regnum == regnum)
3055
      return true;
3056
3057
  return false;
3058
}
3059
3060
3061
/* Read the ending character of a range (in a bracket expression) from the
3062
   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
3063
   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
3064
   Then we set the translation of all bits between the starting and
3065
   ending characters (inclusive) in the compiled pattern B.
3066
3067
   Return an error code.
3068
3069
   We use these short variable names so we can use the same macros as
3070
   `regex_compile' itself.  */
3071
3072
static reg_errcode_t
3073
compile_range (p_ptr, pend, translate, syntax, b)
3074
    const char **p_ptr, *pend;
3075
    RE_TRANSLATE_TYPE translate;
3076
    reg_syntax_t syntax;
3077
    unsigned char *b;
3078
{
3079
  unsigned this_char;
3080
3081
  const char *p = *p_ptr;
3082
  unsigned int range_start, range_end;
3083
3084
  if (p == pend)
3085
    return REG_ERANGE;
3086
3087
  /* Even though the pattern is a signed `char *', we need to fetch
3088
     with unsigned char *'s; if the high bit of the pattern character
3089
     is set, the range endpoints will be negative if we fetch using a
3090
     signed char *.
3091
3092
     We also want to fetch the endpoints without translating them; the
3093
     appropriate translation is done in the bit-setting loop below.  */
3094
  /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
3095
  range_start = ((const unsigned char *) p)[-2];
3096
  range_end   = ((const unsigned char *) p)[0];
3097
3098
  /* Have to increment the pointer into the pattern string, so the
3099
     caller isn't still at the ending character.  */
3100
  (*p_ptr)++;
3101
3102
  /* If the start is after the end, the range is empty.  */
3103
  if (range_start > range_end)
3104
    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3105
3106
  /* Here we see why `this_char' has to be larger than an `unsigned
3107
     char' -- the range is inclusive, so if `range_end' == 0xff
3108
     (assuming 8-bit characters), we would otherwise go into an infinite
3109
     loop, since all characters <= 0xff.  */
3110
  for (this_char = range_start; this_char <= range_end; this_char++)
3111
    {
3112
      SET_LIST_BIT (TRANSLATE (this_char));
3113
    }
3114
3115
  return REG_NOERROR;
3116
}
3117
3118
/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3119
   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3120
   characters can start a string that matches the pattern.  This fastmap
3121
   is used by re_search to skip quickly over impossible starting points.
3122
3123
   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3124
   area as BUFP->fastmap.
3125
3126
   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3127
   the pattern buffer.
3128
3129
   Returns 0 if we succeed, -2 if an internal error.   */
3130
3131
int
3132
re_compile_fastmap (bufp)
3133
     struct re_pattern_buffer *bufp;
3134
{
3135
  int j, k;
3136
#ifdef MATCH_MAY_ALLOCATE
3137
  fail_stack_type fail_stack;
3138
#endif
3139
#ifndef REGEX_MALLOC
3140
  char *destination;
3141
#endif
3142
3143
  register char *fastmap = bufp->fastmap;
3144
  unsigned char *pattern = bufp->buffer;
3145
  unsigned char *p = pattern;
3146
  register unsigned char *pend = pattern + bufp->used;
3147
3148
#ifdef REL_ALLOC
3149
  /* This holds the pointer to the failure stack, when
3150
     it is allocated relocatably.  */
3151
  fail_stack_elt_t *failure_stack_ptr;
3152
#endif
3153
3154
  /* Assume that each path through the pattern can be null until
3155
     proven otherwise.  We set this false at the bottom of switch
3156
     statement, to which we get only if a particular path doesn't
3157
     match the empty string.  */
3158
  boolean path_can_be_null = true;
3159
3160
  /* We aren't doing a `succeed_n' to begin with.  */
3161
  boolean succeed_n_p = false;
3162
3163
  assert (fastmap != NULL && p != NULL);
3164
3165
  INIT_FAIL_STACK ();
3166
  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
3167
  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
3168
  bufp->can_be_null = 0;
3169
3170
  while (1)
3171
    {
3172
      if (p == pend || *p == succeed)
3173
	{
3174
	  /* We have reached the (effective) end of pattern.  */
3175
	  if (!FAIL_STACK_EMPTY ())
3176
	    {
3177
	      bufp->can_be_null |= path_can_be_null;
3178
3179
	      /* Reset for next path.  */
3180
	      path_can_be_null = true;
3181
3182
	      p = fail_stack.stack[--fail_stack.avail].pointer;
3183
3184
	      continue;
3185
	    }
3186
	  else
3187
	    break;
3188
	}
3189
3190
      /* We should never be about to go beyond the end of the pattern.  */
3191
      assert (p < pend);
3192
3193
      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3194
	{
3195
3196
        /* I guess the idea here is to simply not bother with a fastmap
3197
           if a backreference is used, since it's too hard to figure out
3198
           the fastmap for the corresponding group.  Setting
3199
           `can_be_null' stops `re_search_2' from using the fastmap, so
3200
           that is all we do.  */
3201
	case duplicate:
3202
	  bufp->can_be_null = 1;
3203
          goto done;
3204
3205
3206
      /* Following are the cases which match a character.  These end
3207
         with `break'.  */
3208
3209
	case exactn:
3210
          fastmap[p[1]] = 1;
3211
	  break;
3212
3213
3214
        case charset:
3215
          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3216
	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3217
              fastmap[j] = 1;
3218
	  break;
3219
3220
3221
	case charset_not:
3222
	  /* Chars beyond end of map must be allowed.  */
3223
	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3224
            fastmap[j] = 1;
3225
3226
	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3227
	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3228
              fastmap[j] = 1;
3229
          break;
3230
3231
3232
	case wordchar:
3233
	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3234
	    if (SYNTAX (j) == Sword)
3235
	      fastmap[j] = 1;
3236
	  break;
3237
3238
3239
	case notwordchar:
3240
	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3241
	    if (SYNTAX (j) != Sword)
3242
	      fastmap[j] = 1;
3243
	  break;
3244
3245
3246
        case anychar:
3247
	  {
3248
	    int fastmap_newline = fastmap['\n'];
3249
3250
	    /* `.' matches anything ...  */
3251
	    for (j = 0; j < (1 << BYTEWIDTH); j++)
3252
	      fastmap[j] = 1;
3253
3254
	    /* ... except perhaps newline.  */
3255
	    if (!(bufp->syntax & RE_DOT_NEWLINE))
3256
	      fastmap['\n'] = fastmap_newline;
3257
3258
	    /* Return if we have already set `can_be_null'; if we have,
3259
	       then the fastmap is irrelevant.  Something's wrong here.  */
3260
	    else if (bufp->can_be_null)
3261
	      goto done;
3262
3263
	    /* Otherwise, have to check alternative paths.  */
3264
	    break;
3265
	  }
3266
3267
#ifdef emacs
3268
        case syntaxspec:
3269
	  k = *p++;
3270
	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3271
	    if (SYNTAX (j) == (enum syntaxcode) k)
3272
	      fastmap[j] = 1;
3273
	  break;
3274
3275
3276
	case notsyntaxspec:
3277
	  k = *p++;
3278
	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3279
	    if (SYNTAX (j) != (enum syntaxcode) k)
3280
	      fastmap[j] = 1;
3281
	  break;
3282
3283
3284
      /* All cases after this match the empty string.  These end with
3285
         `continue'.  */
3286
3287
3288
	case before_dot:
3289
	case at_dot:
3290
	case after_dot:
3291
          continue;
3292
#endif /* emacs */
3293
3294
3295
        case no_op:
3296
        case begline:
3297
        case endline:
3298
	case begbuf:
3299
	case endbuf:
3300
	case wordbound:
3301
	case notwordbound:
3302
	case wordbeg:
3303
	case wordend:
3304
        case push_dummy_failure:
3305
          continue;
3306
3307
3308
	case jump_n:
3309
        case pop_failure_jump:
3310
	case maybe_pop_jump:
3311
	case jump:
3312
        case jump_past_alt:
3313
	case dummy_failure_jump:
3314
          EXTRACT_NUMBER_AND_INCR (j, p);
3315
	  p += j;
3316
	  if (j > 0)
3317
	    continue;
3318
3319
          /* Jump backward implies we just went through the body of a
3320
             loop and matched nothing.  Opcode jumped to should be
3321
             `on_failure_jump' or `succeed_n'.  Just treat it like an
3322
             ordinary jump.  For a * loop, it has pushed its failure
3323
             point already; if so, discard that as redundant.  */
3324
          if ((re_opcode_t) *p != on_failure_jump
3325
	      && (re_opcode_t) *p != succeed_n)
3326
	    continue;
3327
3328
          p++;
3329
          EXTRACT_NUMBER_AND_INCR (j, p);
3330
          p += j;
3331
3332
          /* If what's on the stack is where we are now, pop it.  */
3333
          if (!FAIL_STACK_EMPTY ()
3334
	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3335
            fail_stack.avail--;
3336
3337
          continue;
3338
3339
3340
        case on_failure_jump:
3341
        case on_failure_keep_string_jump:
3342
	handle_on_failure_jump:
3343
          EXTRACT_NUMBER_AND_INCR (j, p);
3344
3345
          /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3346
             end of the pattern.  We don't want to push such a point,
3347
             since when we restore it above, entering the switch will
3348
             increment `p' past the end of the pattern.  We don't need
3349
             to push such a point since we obviously won't find any more
3350
             fastmap entries beyond `pend'.  Such a pattern can match
3351
             the null string, though.  */
3352
          if (p + j < pend)
3353
            {
3354
              if (!PUSH_PATTERN_OP (p + j, fail_stack))
3355
		{
3356
		  RESET_FAIL_STACK ();
3357
		  return -2;
3358
		}
3359
            }
3360
          else
3361
            bufp->can_be_null = 1;
3362
3363
          if (succeed_n_p)
3364
            {
3365
              EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
3366
              succeed_n_p = false;
3367
	    }
3368
3369
          continue;
3370
3371
3372
	case succeed_n:
3373
          /* Get to the number of times to succeed.  */
3374
          p += 2;
3375
3376
          /* Increment p past the n for when k != 0.  */
3377
          EXTRACT_NUMBER_AND_INCR (k, p);
3378
          if (k == 0)
3379
	    {
3380
              p -= 4;
3381
  	      succeed_n_p = true;  /* Spaghetti code alert.  */
3382
              goto handle_on_failure_jump;
3383
            }
3384
          continue;
3385
3386
3387
	case set_number_at:
3388
          p += 4;
3389
          continue;
3390
3391
3392
	case start_memory:
3393
        case stop_memory:
3394
	  p += 2;
3395
	  continue;
3396
3397
3398
	default:
3399
          abort (); /* We have listed all the cases.  */
3400
        } /* switch *p++ */
3401
3402
      /* Getting here means we have found the possible starting
3403
         characters for one path of the pattern -- and that the empty
3404
         string does not match.  We need not follow this path further.
3405
         Instead, look at the next alternative (remembered on the
3406
         stack), or quit if no more.  The test at the top of the loop
3407
         does these things.  */
3408
      path_can_be_null = false;
3409
      p = pend;
3410
    } /* while p */
3411
3412
  /* Set `can_be_null' for the last path (also the first path, if the
3413
     pattern is empty).  */
3414
  bufp->can_be_null |= path_can_be_null;
3415
3416
 done:
3417
  RESET_FAIL_STACK ();
3418
  return 0;
3419
} /* re_compile_fastmap */
3420
#ifdef _LIBC
3421
weak_alias (__re_compile_fastmap, re_compile_fastmap)
3422
#endif
3423
3424
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3425
   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3426
   this memory for recording register information.  STARTS and ENDS
3427
   must be allocated using the malloc library routine, and must each
3428
   be at least NUM_REGS * sizeof (regoff_t) bytes long.
3429
3430
   If NUM_REGS == 0, then subsequent matches should allocate their own
3431
   register data.
3432
3433
   Unless this function is called, the first search or match using
3434
   PATTERN_BUFFER will allocate its own register data, without
3435
   freeing the old data.  */
3436
3437
void
3438
re_set_registers (bufp, regs, num_regs, starts, ends)
3439
    struct re_pattern_buffer *bufp;
3440
    struct re_registers *regs;
3441
    unsigned num_regs;
3442
    regoff_t *starts, *ends;
3443
{
3444
  if (num_regs)
3445
    {
3446
      bufp->regs_allocated = REGS_REALLOCATE;
3447
      regs->num_regs = num_regs;
3448
      regs->start = starts;
3449
      regs->end = ends;
3450
    }
3451
  else
3452
    {
3453
      bufp->regs_allocated = REGS_UNALLOCATED;
3454
      regs->num_regs = 0;
3455
      regs->start = regs->end = (regoff_t *) 0;
3456
    }
3457
}
3458
#ifdef _LIBC
3459
weak_alias (__re_set_registers, re_set_registers)
3460
#endif
3461
3462
/* Searching routines.  */
3463
3464
/* Like re_search_2, below, but only one string is specified, and
3465
   doesn't let you say where to stop matching. */
3466
3467
int
3468
re_search (bufp, string, size, startpos, range, regs)
3469
     struct re_pattern_buffer *bufp;
3470
     const char *string;
3471
     int size, startpos, range;
3472
     struct re_registers *regs;
3473
{
3474
  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3475
		      regs, size);
3476
}
3477
#ifdef _LIBC
3478
weak_alias (__re_search, re_search)
3479
#endif
3480
3481
3482
/* Using the compiled pattern in BUFP->buffer, first tries to match the
3483
   virtual concatenation of STRING1 and STRING2, starting first at index
3484
   STARTPOS, then at STARTPOS + 1, and so on.
3485
3486
   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3487
3488
   RANGE is how far to scan while trying to match.  RANGE = 0 means try
3489
   only at STARTPOS; in general, the last start tried is STARTPOS +
3490
   RANGE.
3491
3492
   In REGS, return the indices of the virtual concatenation of STRING1
3493
   and STRING2 that matched the entire BUFP->buffer and its contained
3494
   subexpressions.
3495
3496
   Do not consider matching one past the index STOP in the virtual
3497
   concatenation of STRING1 and STRING2.
3498
3499
   We return either the position in the strings at which the match was
3500
   found, -1 if no match, or -2 if error (such as failure
3501
   stack overflow).  */
3502
3503
int
3504
re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3505
     struct re_pattern_buffer *bufp;
3506
     const char *string1, *string2;
3507
     int size1, size2;
3508
     int startpos;
3509
     int range;
3510
     struct re_registers *regs;
3511
     int stop;
3512
{
3513
  int val;
3514
  register char *fastmap = bufp->fastmap;
3515
  register RE_TRANSLATE_TYPE translate = bufp->translate;
3516
  int total_size = size1 + size2;
3517
  int endpos = startpos + range;
3518
3519
  /* Check for out-of-range STARTPOS.  */
3520
  if (startpos < 0 || startpos > total_size)
3521
    return -1;
3522
3523
  /* Fix up RANGE if it might eventually take us outside
3524
     the virtual concatenation of STRING1 and STRING2.
3525
     Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
3526
  if (endpos < 0)
3527
    range = 0 - startpos;
3528
  else if (endpos > total_size)
3529
    range = total_size - startpos;
3530
3531
  /* If the search isn't to be a backwards one, don't waste time in a
3532
     search for a pattern that must be anchored.  */
3533
  if (bufp->used > 0 && range > 0
3534
      && ((re_opcode_t) bufp->buffer[0] == begbuf
3535
	  /* `begline' is like `begbuf' if it cannot match at newlines.  */
3536
	  || ((re_opcode_t) bufp->buffer[0] == begline
3537
	      && !bufp->newline_anchor)))
3538
    {
3539
      if (startpos > 0)
3540
	return -1;
3541
      else
3542
	range = 1;
3543
    }
3544
3545
#ifdef emacs
3546
  /* In a forward search for something that starts with \=.
3547
     don't keep searching past point.  */
3548
  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3549
    {
3550
      range = PT - startpos;
3551
      if (range <= 0)
3552
	return -1;
3553
    }
3554
#endif /* emacs */
3555
3556
  /* Update the fastmap now if not correct already.  */
3557
  if (fastmap && !bufp->fastmap_accurate)
3558
    if (re_compile_fastmap (bufp) == -2)
3559
      return -2;
3560
3561
  /* Loop through the string, looking for a place to start matching.  */
3562
  for (;;)
3563
    {
3564
      /* If a fastmap is supplied, skip quickly over characters that
3565
         cannot be the start of a match.  If the pattern can match the
3566
         null string, however, we don't need to skip characters; we want
3567
         the first null string.  */
3568
      if (fastmap && startpos < total_size && !bufp->can_be_null)
3569
	{
3570
	  if (range > 0)	/* Searching forwards.  */
3571
	    {
3572
	      register const char *d;
3573
	      register int lim = 0;
3574
	      int irange = range;
3575
3576
              if (startpos < size1 && startpos + range >= size1)
3577
                lim = range - (size1 - startpos);
3578
3579
	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
3580
3581
              /* Written out as an if-else to avoid testing `translate'
3582
                 inside the loop.  */
3583
	      if (translate)
3584
                while (range > lim
3585
                       && !fastmap[(unsigned char)
3586
				   translate[(unsigned char) *d++]])
3587
                  range--;
3588
	      else
3589
                while (range > lim && !fastmap[(unsigned char) *d++])
3590
                  range--;
3591
3592
	      startpos += irange - range;
3593
	    }
3594
	  else				/* Searching backwards.  */
3595
	    {
3596
	      register char c = (size1 == 0 || startpos >= size1
3597
                                 ? string2[startpos - size1]
3598
                                 : string1[startpos]);
3599
3600
	      if (!fastmap[(unsigned char) TRANSLATE (c)])
3601
		goto advance;
3602
	    }
3603
	}
3604
3605
      /* If can't match the null string, and that's all we have left, fail.  */
3606
      if (range >= 0 && startpos == total_size && fastmap
3607
          && !bufp->can_be_null)
3608
	return -1;
3609
3610
      val = re_match_2_internal (bufp, string1, size1, string2, size2,
3611
				 startpos, regs, stop);
3612
#ifndef REGEX_MALLOC
3613
# ifdef C_ALLOCA
3614
      alloca (0);
3615
# endif
3616
#endif
3617
3618
      if (val >= 0)
3619
	return startpos;
3620
3621
      if (val == -2)
3622
	return -2;
3623
3624
    advance:
3625
      if (!range)
3626
        break;
3627
      else if (range > 0)
3628
        {
3629
          range--;
3630
          startpos++;
3631
        }
3632
      else
3633
        {
3634
          range++;
3635
          startpos--;
3636
        }
3637
    }
3638
  return -1;
3639
} /* re_search_2 */
3640
#ifdef _LIBC
3641
weak_alias (__re_search_2, re_search_2)
3642
#endif
3643
3644
/* This converts PTR, a pointer into one of the search strings `string1'
3645
   and `string2' into an offset from the beginning of that string.  */
3646
#define POINTER_TO_OFFSET(ptr)			\
3647
  (FIRST_STRING_P (ptr)				\
3648
   ? ((regoff_t) ((ptr) - string1))		\
3649
   : ((regoff_t) ((ptr) - string2 + size1)))
3650
3651
/* Macros for dealing with the split strings in re_match_2.  */
3652
3653
#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3654
3655
/* Call before fetching a character with *d.  This switches over to
3656
   string2 if necessary.  */
3657
#define PREFETCH()							\
3658
  while (d == dend)						    	\
3659
    {									\
3660
      /* End of string2 => fail.  */					\
3661
      if (dend == end_match_2) 						\
3662
        goto fail;							\
3663
      /* End of string1 => advance to string2.  */ 			\
3664
      d = string2;						        \
3665
      dend = end_match_2;						\
3666
    }
3667
3668
3669
/* Test if at very beginning or at very end of the virtual concatenation
3670
   of `string1' and `string2'.  If only one string, it's `string2'.  */
3671
#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3672
#define AT_STRINGS_END(d) ((d) == end2)
3673
3674
3675
/* Test if D points to a character which is word-constituent.  We have
3676
   two special cases to check for: if past the end of string1, look at
3677
   the first character in string2; and if before the beginning of
3678
   string2, look at the last character in string1.  */
3679
#define WORDCHAR_P(d)							\
3680
  (SYNTAX ((d) == end1 ? *string2					\
3681
           : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3682
   == Sword)
3683
3684
/* Disabled due to a compiler bug -- see comment at case wordbound */
3685
#if 0
3686
/* Test if the character before D and the one at D differ with respect
3687
   to being word-constituent.  */
3688
#define AT_WORD_BOUNDARY(d)						\
3689
  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3690
   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3691
#endif
3692
3693
/* Free everything we malloc.  */
3694
#ifdef MATCH_MAY_ALLOCATE
3695
# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
3696
# define FREE_VARIABLES()						\
3697
  do {									\
3698
    REGEX_FREE_STACK (fail_stack.stack);				\
3699
    FREE_VAR (regstart);						\
3700
    FREE_VAR (regend);							\
3701
    FREE_VAR (old_regstart);						\
3702
    FREE_VAR (old_regend);						\
3703
    FREE_VAR (best_regstart);						\
3704
    FREE_VAR (best_regend);						\
3705
    FREE_VAR (reg_info);						\
3706
    FREE_VAR (reg_dummy);						\
3707
    FREE_VAR (reg_info_dummy);						\
3708
  } while (0)
3709
#else
3710
# define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
3711
#endif /* not MATCH_MAY_ALLOCATE */
3712
3713
/* These values must meet several constraints.  They must not be valid
3714
   register values; since we have a limit of 255 registers (because
3715
   we use only one byte in the pattern for the register number), we can
3716
   use numbers larger than 255.  They must differ by 1, because of
3717
   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3718
   be larger than the value for the highest register, so we do not try
3719
   to actually save any registers when none are active.  */
3720
#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3721
#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3722
3723
/* Matching routines.  */
3724
3725
#ifndef emacs   /* Emacs never uses this.  */
3726
/* re_match is like re_match_2 except it takes only a single string.  */
3727
3728
int
3729
re_match (bufp, string, size, pos, regs)
3730
     struct re_pattern_buffer *bufp;
3731
     const char *string;
3732
     int size, pos;
3733
     struct re_registers *regs;
3734
{
3735
  int result = re_match_2_internal (bufp, NULL, 0, string, size,
3736
				    pos, regs, size);
3737
# ifndef REGEX_MALLOC
3738
#  ifdef C_ALLOCA
3739
  alloca (0);
3740
#  endif
3741
# endif
3742
  return result;
3743
}
3744
# ifdef _LIBC
3745
weak_alias (__re_match, re_match)
3746
# endif
3747
#endif /* not emacs */
3748
3749
static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
3750
						    unsigned char *end,
3751
						register_info_type *reg_info));
3752
static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
3753
						  unsigned char *end,
3754
						register_info_type *reg_info));
3755
static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
3756
							unsigned char *end,
3757
						register_info_type *reg_info));
3758
static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
3759
				     int len, char *translate));
3760
3761
/* re_match_2 matches the compiled pattern in BUFP against the
3762
   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3763
   and SIZE2, respectively).  We start matching at POS, and stop
3764
   matching at STOP.
3765
3766
   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3767
   store offsets for the substring each group matched in REGS.  See the
3768
   documentation for exactly how many groups we fill.
3769
3770
   We return -1 if no match, -2 if an internal error (such as the
3771
   failure stack overflowing).  Otherwise, we return the length of the
3772
   matched substring.  */
3773
3774
int
3775
re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3776
     struct re_pattern_buffer *bufp;
3777
     const char *string1, *string2;
3778
     int size1, size2;
3779
     int pos;
3780
     struct re_registers *regs;
3781
     int stop;
3782
{
3783
  int result = re_match_2_internal (bufp, string1, size1, string2, size2,
3784
				    pos, regs, stop);
3785
#ifndef REGEX_MALLOC
3786
# ifdef C_ALLOCA
3787
  alloca (0);
3788
# endif
3789
#endif
3790
  return result;
3791
}
3792
#ifdef _LIBC
3793
weak_alias (__re_match_2, re_match_2)
3794
#endif
3795
3796
/* This is a separate function so that we can force an alloca cleanup
3797
   afterwards.  */
3798
static int
3799
re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
3800
     struct re_pattern_buffer *bufp;
3801
     const char *string1, *string2;
3802
     int size1, size2;
3803
     int pos;
3804
     struct re_registers *regs;
3805
     int stop;
3806
{
3807
  /* General temporaries.  */
3808
  int mcnt;
3809
  unsigned char *p1;
3810
3811
  /* Just past the end of the corresponding string.  */
3812
  const char *end1, *end2;
3813
3814
  /* Pointers into string1 and string2, just past the last characters in
3815
     each to consider matching.  */
3816
  const char *end_match_1, *end_match_2;
3817
3818
  /* Where we are in the data, and the end of the current string.  */
3819
  const char *d, *dend;
3820
3821
  /* Where we are in the pattern, and the end of the pattern.  */
3822
  unsigned char *p = bufp->buffer;
3823
  register unsigned char *pend = p + bufp->used;
3824
3825
  /* Mark the opcode just after a start_memory, so we can test for an
3826
     empty subpattern when we get to the stop_memory.  */
3827
  unsigned char *just_past_start_mem = 0;
3828
3829
  /* We use this to map every character in the string.  */
3830
  RE_TRANSLATE_TYPE translate = bufp->translate;
3831
3832
  /* Failure point stack.  Each place that can handle a failure further
3833
     down the line pushes a failure point on this stack.  It consists of
3834
     restart, regend, and reg_info for all registers corresponding to
3835
     the subexpressions we're currently inside, plus the number of such
3836
     registers, and, finally, two char *'s.  The first char * is where
3837
     to resume scanning the pattern; the second one is where to resume
3838
     scanning the strings.  If the latter is zero, the failure point is
3839
     a ``dummy''; if a failure happens and the failure point is a dummy,
3840
     it gets discarded and the next next one is tried.  */
3841
#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3842
  fail_stack_type fail_stack;
3843
#endif
3844
#ifdef DEBUG
3845
  static unsigned failure_id = 0;
3846
  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3847
#endif
3848
3849
#ifdef REL_ALLOC
3850
  /* This holds the pointer to the failure stack, when
3851
     it is allocated relocatably.  */
3852
  fail_stack_elt_t *failure_stack_ptr;
3853
#endif
3854
3855
  /* We fill all the registers internally, independent of what we
3856
     return, for use in backreferences.  The number here includes
3857
     an element for register zero.  */
3858
  size_t num_regs = bufp->re_nsub + 1;
3859
3860
  /* The currently active registers.  */
3861
  active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3862
  active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3863
3864
  /* Information on the contents of registers. These are pointers into
3865
     the input strings; they record just what was matched (on this
3866
     attempt) by a subexpression part of the pattern, that is, the
3867
     regnum-th regstart pointer points to where in the pattern we began
3868
     matching and the regnum-th regend points to right after where we
3869
     stopped matching the regnum-th subexpression.  (The zeroth register
3870
     keeps track of what the whole pattern matches.)  */
3871
#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3872
  const char **regstart, **regend;
3873
#endif
3874
3875
  /* If a group that's operated upon by a repetition operator fails to
3876
     match anything, then the register for its start will need to be
3877
     restored because it will have been set to wherever in the string we
3878
     are when we last see its open-group operator.  Similarly for a
3879
     register's end.  */
3880
#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3881
  const char **old_regstart, **old_regend;
3882
#endif
3883
3884
  /* The is_active field of reg_info helps us keep track of which (possibly
3885
     nested) subexpressions we are currently in. The matched_something
3886
     field of reg_info[reg_num] helps us tell whether or not we have
3887
     matched any of the pattern so far this time through the reg_num-th
3888
     subexpression.  These two fields get reset each time through any
3889
     loop their register is in.  */
3890
#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3891
  register_info_type *reg_info;
3892
#endif
3893
3894
  /* The following record the register info as found in the above
3895
     variables when we find a match better than any we've seen before.
3896
     This happens as we backtrack through the failure points, which in
3897
     turn happens only if we have not yet matched the entire string. */
3898
  unsigned best_regs_set = false;
3899
#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3900
  const char **best_regstart, **best_regend;
3901
#endif
3902
3903
  /* Logically, this is `best_regend[0]'.  But we don't want to have to
3904
     allocate space for that if we're not allocating space for anything
3905
     else (see below).  Also, we never need info about register 0 for
3906
     any of the other register vectors, and it seems rather a kludge to
3907
     treat `best_regend' differently than the rest.  So we keep track of
3908
     the end of the best match so far in a separate variable.  We
3909
     initialize this to NULL so that when we backtrack the first time
3910
     and need to test it, it's not garbage.  */
3911
  const char *match_end = NULL;
3912
3913
  /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
3914
  int set_regs_matched_done = 0;
3915
3916
  /* Used when we pop values we don't care about.  */
3917
#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3918
  const char **reg_dummy;
3919
  register_info_type *reg_info_dummy;
3920
#endif
3921
3922
#ifdef DEBUG
3923
  /* Counts the total number of registers pushed.  */
3924
  unsigned num_regs_pushed = 0;
3925
#endif
3926
3927
  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3928
3929
  INIT_FAIL_STACK ();
3930
3931
#ifdef MATCH_MAY_ALLOCATE
3932
  /* Do not bother to initialize all the register variables if there are
3933
     no groups in the pattern, as it takes a fair amount of time.  If
3934
     there are groups, we include space for register 0 (the whole
3935
     pattern), even though we never use it, since it simplifies the
3936
     array indexing.  We should fix this.  */
3937
  if (bufp->re_nsub)
3938
    {
3939
      regstart = REGEX_TALLOC (num_regs, const char *);
3940
      regend = REGEX_TALLOC (num_regs, const char *);
3941
      old_regstart = REGEX_TALLOC (num_regs, const char *);
3942
      old_regend = REGEX_TALLOC (num_regs, const char *);
3943
      best_regstart = REGEX_TALLOC (num_regs, const char *);
3944
      best_regend = REGEX_TALLOC (num_regs, const char *);
3945
      reg_info = REGEX_TALLOC (num_regs, register_info_type);
3946
      reg_dummy = REGEX_TALLOC (num_regs, const char *);
3947
      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3948
3949
      if (!(regstart && regend && old_regstart && old_regend && reg_info
3950
            && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3951
        {
3952
          FREE_VARIABLES ();
3953
          return -2;
3954
        }
3955
    }
3956
  else
3957
    {
3958
      /* We must initialize all our variables to NULL, so that
3959
         `FREE_VARIABLES' doesn't try to free them.  */
3960
      regstart = regend = old_regstart = old_regend = best_regstart
3961
        = best_regend = reg_dummy = NULL;
3962
      reg_info = reg_info_dummy = (register_info_type *) NULL;
3963
    }
3964
#endif /* MATCH_MAY_ALLOCATE */
3965
3966
  /* The starting position is bogus.  */
3967
  if (pos < 0 || pos > size1 + size2)
3968
    {
3969
      FREE_VARIABLES ();
3970
      return -1;
3971
    }
3972
3973
  /* Initialize subexpression text positions to -1 to mark ones that no
3974
     start_memory/stop_memory has been seen for. Also initialize the
3975
     register information struct.  */
3976
  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
3977
    {
3978
      regstart[mcnt] = regend[mcnt]
3979
        = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3980
3981
      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3982
      IS_ACTIVE (reg_info[mcnt]) = 0;
3983
      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3984
      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3985
    }
3986
3987
  /* We move `string1' into `string2' if the latter's empty -- but not if
3988
     `string1' is null.  */
3989
  if (size2 == 0 && string1 != NULL)
3990
    {
3991
      string2 = string1;
3992
      size2 = size1;
3993
      string1 = 0;
3994
      size1 = 0;
3995
    }
3996
  end1 = string1 + size1;
3997
  end2 = string2 + size2;
3998
3999
  /* Compute where to stop matching, within the two strings.  */
4000
  if (stop <= size1)
4001
    {
4002
      end_match_1 = string1 + stop;
4003
      end_match_2 = string2;
4004
    }
4005
  else
4006
    {
4007
      end_match_1 = end1;
4008
      end_match_2 = string2 + stop - size1;
4009
    }
4010
4011
  /* `p' scans through the pattern as `d' scans through the data.
4012
     `dend' is the end of the input string that `d' points within.  `d'
4013
     is advanced into the following input string whenever necessary, but
4014
     this happens before fetching; therefore, at the beginning of the
4015
     loop, `d' can be pointing at the end of a string, but it cannot
4016
     equal `string2'.  */
4017
  if (size1 > 0 && pos <= size1)
4018
    {
4019
      d = string1 + pos;
4020
      dend = end_match_1;
4021
    }
4022
  else
4023
    {
4024
      d = string2 + pos - size1;
4025
      dend = end_match_2;
4026
    }
4027
4028
  DEBUG_PRINT1 ("The compiled pattern is:\n");
4029
  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4030
  DEBUG_PRINT1 ("The string to match is: `");
4031
  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4032
  DEBUG_PRINT1 ("'\n");
4033
4034
  /* This loops over pattern commands.  It exits by returning from the
4035
     function if the match is complete, or it drops through if the match
4036
     fails at this starting point in the input data.  */
4037
  for (;;)
4038
    {
4039
#ifdef _LIBC
4040
      DEBUG_PRINT2 ("\n%p: ", p);
4041
#else
4042
      DEBUG_PRINT2 ("\n0x%x: ", p);
4043
#endif
4044
4045
      if (p == pend)
4046
	{ /* End of pattern means we might have succeeded.  */
4047
          DEBUG_PRINT1 ("end of pattern ... ");
4048
4049
	  /* If we haven't matched the entire string, and we want the
4050
             longest match, try backtracking.  */
4051
          if (d != end_match_2)
4052
	    {
4053
	      /* 1 if this match ends in the same string (string1 or string2)
4054
		 as the best previous match.  */
4055
	      boolean same_str_p = (FIRST_STRING_P (match_end)
4056
				    == MATCHING_IN_FIRST_STRING);
4057
	      /* 1 if this match is the best seen so far.  */
4058
	      boolean best_match_p;
4059
4060
	      /* AIX compiler got confused when this was combined
4061
		 with the previous declaration.  */
4062
	      if (same_str_p)
4063
		best_match_p = d > match_end;
4064
	      else
4065
		best_match_p = !MATCHING_IN_FIRST_STRING;
4066
4067
              DEBUG_PRINT1 ("backtracking.\n");
4068
4069
              if (!FAIL_STACK_EMPTY ())
4070
                { /* More failure points to try.  */
4071
4072
                  /* If exceeds best match so far, save it.  */
4073
                  if (!best_regs_set || best_match_p)
4074
                    {
4075
                      best_regs_set = true;
4076
                      match_end = d;
4077
4078
                      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4079
4080
                      for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4081
                        {
4082
                          best_regstart[mcnt] = regstart[mcnt];
4083
                          best_regend[mcnt] = regend[mcnt];
4084
                        }
4085
                    }
4086
                  goto fail;
4087
                }
4088
4089
              /* If no failure points, don't restore garbage.  And if
4090
                 last match is real best match, don't restore second
4091
                 best one. */
4092
              else if (best_regs_set && !best_match_p)
4093
                {
4094
  	        restore_best_regs:
4095
                  /* Restore best match.  It may happen that `dend ==
4096
                     end_match_1' while the restored d is in string2.
4097
                     For example, the pattern `x.*y.*z' against the
4098
                     strings `x-' and `y-z-', if the two strings are
4099
                     not consecutive in memory.  */
4100
                  DEBUG_PRINT1 ("Restoring best registers.\n");
4101
4102
                  d = match_end;
4103
                  dend = ((d >= string1 && d <= end1)
4104
		           ? end_match_1 : end_match_2);
4105
4106
		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4107
		    {
4108
		      regstart[mcnt] = best_regstart[mcnt];
4109
		      regend[mcnt] = best_regend[mcnt];
4110
		    }
4111
                }
4112
            } /* d != end_match_2 */
4113
4114
	succeed_label:
4115
          DEBUG_PRINT1 ("Accepting match.\n");
4116
4117
          /* If caller wants register contents data back, do it.  */
4118
          if (regs && !bufp->no_sub)
4119
	    {
4120
              /* Have the register data arrays been allocated?  */
4121
              if (bufp->regs_allocated == REGS_UNALLOCATED)
4122
                { /* No.  So allocate them with malloc.  We need one
4123
                     extra element beyond `num_regs' for the `-1' marker
4124
                     GNU code uses.  */
4125
                  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4126
                  regs->start = TALLOC (regs->num_regs, regoff_t);
4127
                  regs->end = TALLOC (regs->num_regs, regoff_t);
4128
                  if (regs->start == NULL || regs->end == NULL)
4129
		    {
4130
		      FREE_VARIABLES ();
4131
		      return -2;
4132
		    }
4133
                  bufp->regs_allocated = REGS_REALLOCATE;
4134
                }
4135
              else if (bufp->regs_allocated == REGS_REALLOCATE)
4136
                { /* Yes.  If we need more elements than were already
4137
                     allocated, reallocate them.  If we need fewer, just
4138
                     leave it alone.  */
4139
                  if (regs->num_regs < num_regs + 1)
4140
                    {
4141
                      regs->num_regs = num_regs + 1;
4142
                      RETALLOC (regs->start, regs->num_regs, regoff_t);
4143
                      RETALLOC (regs->end, regs->num_regs, regoff_t);
4144
                      if (regs->start == NULL || regs->end == NULL)
4145
			{
4146
			  FREE_VARIABLES ();
4147
			  return -2;
4148
			}
4149
                    }
4150
                }
4151
              else
4152
		{
4153
		  /* These braces fend off a "empty body in an else-statement"
4154
		     warning under GCC when assert expands to nothing.  */
4155
		  assert (bufp->regs_allocated == REGS_FIXED);
4156
		}
4157
4158
              /* Convert the pointer data in `regstart' and `regend' to
4159
                 indices.  Register zero has to be set differently,
4160
                 since we haven't kept track of any info for it.  */
4161
              if (regs->num_regs > 0)
4162
                {
4163
                  regs->start[0] = pos;
4164
                  regs->end[0] = (MATCHING_IN_FIRST_STRING
4165
				  ? ((regoff_t) (d - string1))
4166
			          : ((regoff_t) (d - string2 + size1)));
4167
                }
4168
4169
              /* Go through the first `min (num_regs, regs->num_regs)'
4170
                 registers, since that is all we initialized.  */
4171
	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
4172
		   mcnt++)
4173
		{
4174
                  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4175
                    regs->start[mcnt] = regs->end[mcnt] = -1;
4176
                  else
4177
                    {
4178
		      regs->start[mcnt]
4179
			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4180
                      regs->end[mcnt]
4181
			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4182
                    }
4183
		}
4184
4185
              /* If the regs structure we return has more elements than
4186
                 were in the pattern, set the extra elements to -1.  If
4187
                 we (re)allocated the registers, this is the case,
4188
                 because we always allocate enough to have at least one
4189
                 -1 at the end.  */
4190
              for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
4191
                regs->start[mcnt] = regs->end[mcnt] = -1;
4192
	    } /* regs && !bufp->no_sub */
4193
4194
          DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4195
                        nfailure_points_pushed, nfailure_points_popped,
4196
                        nfailure_points_pushed - nfailure_points_popped);
4197
          DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4198
4199
          mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4200
			    ? string1
4201
			    : string2 - size1);
4202
4203
          DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4204
4205
          FREE_VARIABLES ();
4206
          return mcnt;
4207
        }
4208
4209
      /* Otherwise match next pattern command.  */
4210
      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4211
	{
4212
        /* Ignore these.  Used to ignore the n of succeed_n's which
4213
           currently have n == 0.  */
4214
        case no_op:
4215
          DEBUG_PRINT1 ("EXECUTING no_op.\n");
4216
          break;
4217
4218
	case succeed:
4219
          DEBUG_PRINT1 ("EXECUTING succeed.\n");
4220
	  goto succeed_label;
4221
4222
        /* Match the next n pattern characters exactly.  The following
4223
           byte in the pattern defines n, and the n bytes after that
4224
           are the characters to match.  */
4225
	case exactn:
4226
	  mcnt = *p++;
4227
          DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4228
4229
          /* This is written out as an if-else so we don't waste time
4230
             testing `translate' inside the loop.  */
4231
          if (translate)
4232
	    {
4233
	      do
4234
		{
4235
		  PREFETCH ();
4236
		  if ((unsigned char) translate[(unsigned char) *d++]
4237
		      != (unsigned char) *p++)
4238
                    goto fail;
4239
		}
4240
	      while (--mcnt);
4241
	    }
4242
	  else
4243
	    {
4244
	      do
4245
		{
4246
		  PREFETCH ();
4247
		  if (*d++ != (char) *p++) goto fail;
4248
		}
4249
	      while (--mcnt);
4250
	    }
4251
	  SET_REGS_MATCHED ();
4252
          break;
4253
4254
4255
        /* Match any character except possibly a newline or a null.  */
4256
	case anychar:
4257
          DEBUG_PRINT1 ("EXECUTING anychar.\n");
4258
4259
          PREFETCH ();
4260
4261
          if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
4262
              || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4263
	    goto fail;
4264
4265
          SET_REGS_MATCHED ();
4266
          DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
4267
          d++;
4268
	  break;
4269
4270
4271
	case charset:
4272
	case charset_not:
4273
	  {
4274
	    register unsigned char c;
4275
	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
4276
4277
            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4278
4279
	    PREFETCH ();
4280
	    c = TRANSLATE (*d); /* The character to match.  */
4281
4282
            /* Cast to `unsigned' instead of `unsigned char' in case the
4283
               bit list is a full 32 bytes long.  */
4284
	    if (c < (unsigned) (*p * BYTEWIDTH)
4285
		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4286
	      not = !not;
4287
4288
	    p += 1 + *p;
4289
4290
	    if (!not) goto fail;
4291
4292
	    SET_REGS_MATCHED ();
4293
            d++;
4294
	    break;
4295
	  }
4296
4297
4298
        /* The beginning of a group is represented by start_memory.
4299
           The arguments are the register number in the next byte, and the
4300
           number of groups inner to this one in the next.  The text
4301
           matched within the group is recorded (in the internal
4302
           registers data structure) under the register number.  */
4303
        case start_memory:
4304
	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4305
4306
          /* Find out if this group can match the empty string.  */
4307
	  p1 = p;		/* To send to group_match_null_string_p.  */
4308
4309
          if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4310
            REG_MATCH_NULL_STRING_P (reg_info[*p])
4311
              = group_match_null_string_p (&p1, pend, reg_info);
4312
4313
          /* Save the position in the string where we were the last time
4314
             we were at this open-group operator in case the group is
4315
             operated upon by a repetition operator, e.g., with `(a*)*b'
4316
             against `ab'; then we want to ignore where we are now in
4317
             the string in case this attempt to match fails.  */
4318
          old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4319
                             ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4320
                             : regstart[*p];
4321
	  DEBUG_PRINT2 ("  old_regstart: %d\n",
4322
			 POINTER_TO_OFFSET (old_regstart[*p]));
4323
4324
          regstart[*p] = d;
4325
	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4326
4327
          IS_ACTIVE (reg_info[*p]) = 1;
4328
          MATCHED_SOMETHING (reg_info[*p]) = 0;
4329
4330
	  /* Clear this whenever we change the register activity status.  */
4331
	  set_regs_matched_done = 0;
4332
4333
          /* This is the new highest active register.  */
4334
          highest_active_reg = *p;
4335
4336
          /* If nothing was active before, this is the new lowest active
4337
             register.  */
4338
          if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4339
            lowest_active_reg = *p;
4340
4341
          /* Move past the register number and inner group count.  */
4342
          p += 2;
4343
	  just_past_start_mem = p;
4344
4345
          break;
4346
4347
4348
        /* The stop_memory opcode represents the end of a group.  Its
4349
           arguments are the same as start_memory's: the register
4350
           number, and the number of inner groups.  */
4351
	case stop_memory:
4352
	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4353
4354
          /* We need to save the string position the last time we were at
4355
             this close-group operator in case the group is operated
4356
             upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4357
             against `aba'; then we want to ignore where we are now in
4358
             the string in case this attempt to match fails.  */
4359
          old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4360
                           ? REG_UNSET (regend[*p]) ? d : regend[*p]
4361
			   : regend[*p];
4362
	  DEBUG_PRINT2 ("      old_regend: %d\n",
4363
			 POINTER_TO_OFFSET (old_regend[*p]));
4364
4365
          regend[*p] = d;
4366
	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4367
4368
          /* This register isn't active anymore.  */
4369
          IS_ACTIVE (reg_info[*p]) = 0;
4370
4371
	  /* Clear this whenever we change the register activity status.  */
4372
	  set_regs_matched_done = 0;
4373
4374
          /* If this was the only register active, nothing is active
4375
             anymore.  */
4376
          if (lowest_active_reg == highest_active_reg)
4377
            {
4378
              lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4379
              highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4380
            }
4381
          else
4382
            { /* We must scan for the new highest active register, since
4383
                 it isn't necessarily one less than now: consider
4384
                 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
4385
                 new highest active register is 1.  */
4386
              unsigned char r = *p - 1;
4387
              while (r > 0 && !IS_ACTIVE (reg_info[r]))
4388
                r--;
4389
4390
              /* If we end up at register zero, that means that we saved
4391
                 the registers as the result of an `on_failure_jump', not
4392
                 a `start_memory', and we jumped to past the innermost
4393
                 `stop_memory'.  For example, in ((.)*) we save
4394
                 registers 1 and 2 as a result of the *, but when we pop
4395
                 back to the second ), we are at the stop_memory 1.
4396
                 Thus, nothing is active.  */
4397
	      if (r == 0)
4398
                {
4399
                  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4400
                  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4401
                }
4402
              else
4403
                highest_active_reg = r;
4404
            }
4405
4406
          /* If just failed to match something this time around with a
4407
             group that's operated on by a repetition operator, try to
4408
             force exit from the ``loop'', and restore the register
4409
             information for this group that we had before trying this
4410
             last match.  */
4411
          if ((!MATCHED_SOMETHING (reg_info[*p])
4412
               || just_past_start_mem == p - 1)
4413
	      && (p + 2) < pend)
4414
            {
4415
              boolean is_a_jump_n = false;
4416
4417
              p1 = p + 2;
4418
              mcnt = 0;
4419
              switch ((re_opcode_t) *p1++)
4420
                {
4421
                  case jump_n:
4422
		    is_a_jump_n = true;
4423
                  case pop_failure_jump:
4424
		  case maybe_pop_jump:
4425
		  case jump:
4426
		  case dummy_failure_jump:
4427
                    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4428
		    if (is_a_jump_n)
4429
		      p1 += 2;
4430
                    break;
4431
4432
                  default:
4433
                    /* do nothing */ ;
4434
                }
4435
	      p1 += mcnt;
4436
4437
              /* If the next operation is a jump backwards in the pattern
4438
	         to an on_failure_jump right before the start_memory
4439
                 corresponding to this stop_memory, exit from the loop
4440
                 by forcing a failure after pushing on the stack the
4441
                 on_failure_jump's jump in the pattern, and d.  */
4442
              if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4443
                  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4444
		{
4445
                  /* If this group ever matched anything, then restore
4446
                     what its registers were before trying this last
4447
                     failed match, e.g., with `(a*)*b' against `ab' for
4448
                     regstart[1], and, e.g., with `((a*)*(b*)*)*'
4449
                     against `aba' for regend[3].
4450
4451
                     Also restore the registers for inner groups for,
4452
                     e.g., `((a*)(b*))*' against `aba' (register 3 would
4453
                     otherwise get trashed).  */
4454
4455
                  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4456
		    {
4457
		      unsigned r;
4458
4459
                      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4460
4461
		      /* Restore this and inner groups' (if any) registers.  */
4462
                      for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
4463
			   r++)
4464
                        {
4465
                          regstart[r] = old_regstart[r];
4466
4467
                          /* xx why this test?  */
4468
                          if (old_regend[r] >= regstart[r])
4469
                            regend[r] = old_regend[r];
4470
                        }
4471
                    }
4472
		  p1++;
4473
                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4474
                  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4475
4476
                  goto fail;
4477
                }
4478
            }
4479
4480
          /* Move past the register number and the inner group count.  */
4481
          p += 2;
4482
          break;
4483
4484
4485
	/* \<digit> has been turned into a `duplicate' command which is
4486
           followed by the numeric value of <digit> as the register number.  */
4487
        case duplicate:
4488
	  {
4489
	    register const char *d2, *dend2;
4490
	    int regno = *p++;   /* Get which register to match against.  */
4491
	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4492
4493
	    /* Can't back reference a group which we've never matched.  */
4494
            if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4495
              goto fail;
4496
4497
            /* Where in input to try to start matching.  */
4498
            d2 = regstart[regno];
4499
4500
            /* Where to stop matching; if both the place to start and
4501
               the place to stop matching are in the same string, then
4502
               set to the place to stop, otherwise, for now have to use
4503
               the end of the first string.  */
4504
4505
            dend2 = ((FIRST_STRING_P (regstart[regno])
4506
		      == FIRST_STRING_P (regend[regno]))
4507
		     ? regend[regno] : end_match_1);
4508
	    for (;;)
4509
	      {
4510
		/* If necessary, advance to next segment in register
4511
                   contents.  */
4512
		while (d2 == dend2)
4513
		  {
4514
		    if (dend2 == end_match_2) break;
4515
		    if (dend2 == regend[regno]) break;
4516
4517
                    /* End of string1 => advance to string2. */
4518
                    d2 = string2;
4519
                    dend2 = regend[regno];
4520
		  }
4521
		/* At end of register contents => success */
4522
		if (d2 == dend2) break;
4523
4524
		/* If necessary, advance to next segment in data.  */
4525
		PREFETCH ();
4526
4527
		/* How many characters left in this segment to match.  */
4528
		mcnt = dend - d;
4529
4530
		/* Want how many consecutive characters we can match in
4531
                   one shot, so, if necessary, adjust the count.  */
4532
                if (mcnt > dend2 - d2)
4533
		  mcnt = dend2 - d2;
4534
4535
		/* Compare that many; failure if mismatch, else move
4536
                   past them.  */
4537
		if (translate
4538
                    ? bcmp_translate (d, d2, mcnt, translate)
4539
                    : memcmp (d, d2, mcnt))
4540
		  goto fail;
4541
		d += mcnt, d2 += mcnt;
4542
4543
		/* Do this because we've match some characters.  */
4544
		SET_REGS_MATCHED ();
4545
	      }
4546
	  }
4547
	  break;
4548
4549
4550
        /* begline matches the empty string at the beginning of the string
4551
           (unless `not_bol' is set in `bufp'), and, if
4552
           `newline_anchor' is set, after newlines.  */
4553
	case begline:
4554
          DEBUG_PRINT1 ("EXECUTING begline.\n");
4555
4556
          if (AT_STRINGS_BEG (d))
4557
            {
4558
              if (!bufp->not_bol) break;
4559
            }
4560
          else if (d[-1] == '\n' && bufp->newline_anchor)
4561
            {
4562
              break;
4563
            }
4564
          /* In all other cases, we fail.  */
4565
          goto fail;
4566
4567
4568
        /* endline is the dual of begline.  */
4569
	case endline:
4570
          DEBUG_PRINT1 ("EXECUTING endline.\n");
4571
4572
          if (AT_STRINGS_END (d))
4573
            {
4574
              if (!bufp->not_eol) break;
4575
            }
4576
4577
          /* We have to ``prefetch'' the next character.  */
4578
          else if ((d == end1 ? *string2 : *d) == '\n'
4579
                   && bufp->newline_anchor)
4580
            {
4581
              break;
4582
            }
4583
          goto fail;
4584
4585
4586
	/* Match at the very beginning of the data.  */
4587
        case begbuf:
4588
          DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4589
          if (AT_STRINGS_BEG (d))
4590
            break;
4591
          goto fail;
4592
4593
4594
	/* Match at the very end of the data.  */
4595
        case endbuf:
4596
          DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4597
	  if (AT_STRINGS_END (d))
4598
	    break;
4599
          goto fail;
4600
4601
4602
        /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
4603
           pushes NULL as the value for the string on the stack.  Then
4604
           `pop_failure_point' will keep the current value for the
4605
           string, instead of restoring it.  To see why, consider
4606
           matching `foo\nbar' against `.*\n'.  The .* matches the foo;
4607
           then the . fails against the \n.  But the next thing we want
4608
           to do is match the \n against the \n; if we restored the
4609
           string value, we would be back at the foo.
4610
4611
           Because this is used only in specific cases, we don't need to
4612
           check all the things that `on_failure_jump' does, to make
4613
           sure the right things get saved on the stack.  Hence we don't
4614
           share its code.  The only reason to push anything on the
4615
           stack at all is that otherwise we would have to change
4616
           `anychar's code to do something besides goto fail in this
4617
           case; that seems worse than this.  */
4618
        case on_failure_keep_string_jump:
4619
          DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4620
4621
          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4622
#ifdef _LIBC
4623
          DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
4624
#else
4625
          DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4626
#endif
4627
4628
          PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4629
          break;
4630
4631
4632
	/* Uses of on_failure_jump:
4633
4634
           Each alternative starts with an on_failure_jump that points
4635
           to the beginning of the next alternative.  Each alternative
4636
           except the last ends with a jump that in effect jumps past
4637
           the rest of the alternatives.  (They really jump to the
4638
           ending jump of the following alternative, because tensioning
4639
           these jumps is a hassle.)
4640
4641
           Repeats start with an on_failure_jump that points past both
4642
           the repetition text and either the following jump or
4643
           pop_failure_jump back to this on_failure_jump.  */
4644
	case on_failure_jump:
4645
        on_failure:
4646
          DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4647
4648
          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4649
#ifdef _LIBC
4650
          DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
4651
#else
4652
          DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
4653
#endif
4654
4655
          /* If this on_failure_jump comes right before a group (i.e.,
4656
             the original * applied to a group), save the information
4657
             for that group and all inner ones, so that if we fail back
4658
             to this point, the group's information will be correct.
4659
             For example, in \(a*\)*\1, we need the preceding group,
4660
             and in \(zz\(a*\)b*\)\2, we need the inner group.  */
4661
4662
          /* We can't use `p' to check ahead because we push
4663
             a failure point to `p + mcnt' after we do this.  */
4664
          p1 = p;
4665
4666
          /* We need to skip no_op's before we look for the
4667
             start_memory in case this on_failure_jump is happening as
4668
             the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4669
             against aba.  */
4670
          while (p1 < pend && (re_opcode_t) *p1 == no_op)
4671
            p1++;
4672
4673
          if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4674
            {
4675
              /* We have a new highest active register now.  This will
4676
                 get reset at the start_memory we are about to get to,
4677
                 but we will have saved all the registers relevant to
4678
                 this repetition op, as described above.  */
4679
              highest_active_reg = *(p1 + 1) + *(p1 + 2);
4680
              if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4681
                lowest_active_reg = *(p1 + 1);
4682
            }
4683
4684
          DEBUG_PRINT1 (":\n");
4685
          PUSH_FAILURE_POINT (p + mcnt, d, -2);
4686
          break;
4687
4688
4689
        /* A smart repeat ends with `maybe_pop_jump'.
4690
	   We change it to either `pop_failure_jump' or `jump'.  */
4691
        case maybe_pop_jump:
4692
          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4693
          DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4694
          {
4695
	    register unsigned char *p2 = p;
4696
4697
            /* Compare the beginning of the repeat with what in the
4698
               pattern follows its end. If we can establish that there
4699
               is nothing that they would both match, i.e., that we
4700
               would have to backtrack because of (as in, e.g., `a*a')
4701
               then we can change to pop_failure_jump, because we'll
4702
               never have to backtrack.
4703
4704
               This is not true in the case of alternatives: in
4705
               `(a|ab)*' we do need to backtrack to the `ab' alternative
4706
               (e.g., if the string was `ab').  But instead of trying to
4707
               detect that here, the alternative has put on a dummy
4708
               failure point which is what we will end up popping.  */
4709
4710
	    /* Skip over open/close-group commands.
4711
	       If what follows this loop is a ...+ construct,
4712
	       look at what begins its body, since we will have to
4713
	       match at least one of that.  */
4714
	    while (1)
4715
	      {
4716
		if (p2 + 2 < pend
4717
		    && ((re_opcode_t) *p2 == stop_memory
4718
			|| (re_opcode_t) *p2 == start_memory))
4719
		  p2 += 3;
4720
		else if (p2 + 6 < pend
4721
			 && (re_opcode_t) *p2 == dummy_failure_jump)
4722
		  p2 += 6;
4723
		else
4724
		  break;
4725
	      }
4726
4727
	    p1 = p + mcnt;
4728
	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4729
	       to the `maybe_finalize_jump' of this case.  Examine what
4730
	       follows.  */
4731
4732
            /* If we're at the end of the pattern, we can change.  */
4733
            if (p2 == pend)
4734
	      {
4735
		/* Consider what happens when matching ":\(.*\)"
4736
		   against ":/".  I don't really understand this code
4737
		   yet.  */
4738
  	        p[-3] = (unsigned char) pop_failure_jump;
4739
                DEBUG_PRINT1
4740
                  ("  End of pattern: change to `pop_failure_jump'.\n");
4741
              }
4742
4743
            else if ((re_opcode_t) *p2 == exactn
4744
		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4745
	      {
4746
		register unsigned char c
4747
                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4748
4749
                if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4750
                  {
4751
  		    p[-3] = (unsigned char) pop_failure_jump;
4752
                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4753
                                  c, p1[5]);
4754
                  }
4755
4756
		else if ((re_opcode_t) p1[3] == charset
4757
			 || (re_opcode_t) p1[3] == charset_not)
4758
		  {
4759
		    int not = (re_opcode_t) p1[3] == charset_not;
4760
4761
		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4762
			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4763
		      not = !not;
4764
4765
                    /* `not' is equal to 1 if c would match, which means
4766
                        that we can't change to pop_failure_jump.  */
4767
		    if (!not)
4768
                      {
4769
  		        p[-3] = (unsigned char) pop_failure_jump;
4770
                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4771
                      }
4772
		  }
4773
	      }
4774
            else if ((re_opcode_t) *p2 == charset)
4775
	      {
4776
#ifdef DEBUG
4777
		register unsigned char c
4778
                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4779
#endif
4780
4781
#if 0
4782
                if ((re_opcode_t) p1[3] == exactn
4783
		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
4784
			  && (p2[2 + p1[5] / BYTEWIDTH]
4785
			      & (1 << (p1[5] % BYTEWIDTH)))))
4786
#else
4787
                if ((re_opcode_t) p1[3] == exactn
4788
		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
4789
			  && (p2[2 + p1[4] / BYTEWIDTH]
4790
			      & (1 << (p1[4] % BYTEWIDTH)))))
4791
#endif
4792
                  {
4793
  		    p[-3] = (unsigned char) pop_failure_jump;
4794
                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4795
                                  c, p1[5]);
4796
                  }
4797
4798
		else if ((re_opcode_t) p1[3] == charset_not)
4799
		  {
4800
		    int idx;
4801
		    /* We win if the charset_not inside the loop
4802
		       lists every character listed in the charset after.  */
4803
		    for (idx = 0; idx < (int) p2[1]; idx++)
4804
		      if (! (p2[2 + idx] == 0
4805
			     || (idx < (int) p1[4]
4806
				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
4807
			break;
4808
4809
		    if (idx == p2[1])
4810
                      {
4811
  		        p[-3] = (unsigned char) pop_failure_jump;
4812
                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4813
                      }
4814
		  }
4815
		else if ((re_opcode_t) p1[3] == charset)
4816
		  {
4817
		    int idx;
4818
		    /* We win if the charset inside the loop
4819
		       has no overlap with the one after the loop.  */
4820
		    for (idx = 0;
4821
			 idx < (int) p2[1] && idx < (int) p1[4];
4822
			 idx++)
4823
		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
4824
			break;
4825
4826
		    if (idx == p2[1] || idx == p1[4])
4827
                      {
4828
  		        p[-3] = (unsigned char) pop_failure_jump;
4829
                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4830
                      }
4831
		  }
4832
	      }
4833
	  }
4834
	  p -= 2;		/* Point at relative address again.  */
4835
	  if ((re_opcode_t) p[-1] != pop_failure_jump)
4836
	    {
4837
	      p[-1] = (unsigned char) jump;
4838
              DEBUG_PRINT1 ("  Match => jump.\n");
4839
	      goto unconditional_jump;
4840
	    }
4841
        /* Note fall through.  */
4842
4843
4844
	/* The end of a simple repeat has a pop_failure_jump back to
4845
           its matching on_failure_jump, where the latter will push a
4846
           failure point.  The pop_failure_jump takes off failure
4847
           points put on by this pop_failure_jump's matching
4848
           on_failure_jump; we got through the pattern to here from the
4849
           matching on_failure_jump, so didn't fail.  */
4850
        case pop_failure_jump:
4851
          {
4852
            /* We need to pass separate storage for the lowest and
4853
               highest registers, even though we don't care about the
4854
               actual values.  Otherwise, we will restore only one
4855
               register from the stack, since lowest will == highest in
4856
               `pop_failure_point'.  */
4857
            active_reg_t dummy_low_reg, dummy_high_reg;
4858
            unsigned char *pdummy;
4859
            const char *sdummy;
4860
4861
            DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4862
            POP_FAILURE_POINT (sdummy, pdummy,
4863
                               dummy_low_reg, dummy_high_reg,
4864
                               reg_dummy, reg_dummy, reg_info_dummy);
4865
          }
4866
	  /* Note fall through.  */
4867
4868
	unconditional_jump:
4869
#ifdef _LIBC
4870
	  DEBUG_PRINT2 ("\n%p: ", p);
4871
#else
4872
	  DEBUG_PRINT2 ("\n0x%x: ", p);
4873
#endif
4874
          /* Note fall through.  */
4875
4876
        /* Unconditionally jump (without popping any failure points).  */
4877
        case jump:
4878
	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
4879
          DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4880
	  p += mcnt;				/* Do the jump.  */
4881
#ifdef _LIBC
4882
          DEBUG_PRINT2 ("(to %p).\n", p);
4883
#else
4884
          DEBUG_PRINT2 ("(to 0x%x).\n", p);
4885
#endif
4886
	  break;
4887
4888
4889
        /* We need this opcode so we can detect where alternatives end
4890
           in `group_match_null_string_p' et al.  */
4891
        case jump_past_alt:
4892
          DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4893
          goto unconditional_jump;
4894
4895
4896
        /* Normally, the on_failure_jump pushes a failure point, which
4897
           then gets popped at pop_failure_jump.  We will end up at
4898
           pop_failure_jump, also, and with a pattern of, say, `a+', we
4899
           are skipping over the on_failure_jump, so we have to push
4900
           something meaningless for pop_failure_jump to pop.  */
4901
        case dummy_failure_jump:
4902
          DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4903
          /* It doesn't matter what we push for the string here.  What
4904
             the code at `fail' tests is the value for the pattern.  */
4905
          PUSH_FAILURE_POINT (NULL, NULL, -2);
4906
          goto unconditional_jump;
4907
4908
4909
        /* At the end of an alternative, we need to push a dummy failure
4910
           point in case we are followed by a `pop_failure_jump', because
4911
           we don't want the failure point for the alternative to be
4912
           popped.  For example, matching `(a|ab)*' against `aab'
4913
           requires that we match the `ab' alternative.  */
4914
        case push_dummy_failure:
4915
          DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4916
          /* See comments just above at `dummy_failure_jump' about the
4917
             two zeroes.  */
4918
          PUSH_FAILURE_POINT (NULL, NULL, -2);
4919
          break;
4920
4921
        /* Have to succeed matching what follows at least n times.
4922
           After that, handle like `on_failure_jump'.  */
4923
        case succeed_n:
4924
          EXTRACT_NUMBER (mcnt, p + 2);
4925
          DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4926
4927
          assert (mcnt >= 0);
4928
          /* Originally, this is how many times we HAVE to succeed.  */
4929
          if (mcnt > 0)
4930
            {
4931
               mcnt--;
4932
	       p += 2;
4933
               STORE_NUMBER_AND_INCR (p, mcnt);
4934
#ifdef _LIBC
4935
               DEBUG_PRINT3 ("  Setting %p to %d.\n", p - 2, mcnt);
4936
#else
4937
               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - 2, mcnt);
4938
#endif
4939
            }
4940
	  else if (mcnt == 0)
4941
            {
4942
#ifdef _LIBC
4943
              DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
4944
#else
4945
              DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4946
#endif
4947
	      p[2] = (unsigned char) no_op;
4948
              p[3] = (unsigned char) no_op;
4949
              goto on_failure;
4950
            }
4951
          break;
4952
4953
        case jump_n:
4954
          EXTRACT_NUMBER (mcnt, p + 2);
4955
          DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4956
4957
          /* Originally, this is how many times we CAN jump.  */
4958
          if (mcnt)
4959
            {
4960
               mcnt--;
4961
               STORE_NUMBER (p + 2, mcnt);
4962
#ifdef _LIBC
4963
               DEBUG_PRINT3 ("  Setting %p to %d.\n", p + 2, mcnt);
4964
#else
4965
               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + 2, mcnt);
4966
#endif
4967
	       goto unconditional_jump;
4968
            }
4969
          /* If don't have to jump any more, skip over the rest of command.  */
4970
	  else
4971
	    p += 4;
4972
          break;
4973
4974
	case set_number_at:
4975
	  {
4976
            DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4977
4978
            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4979
            p1 = p + mcnt;
4980
            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4981
#ifdef _LIBC
4982
            DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
4983
#else
4984
            DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4985
#endif
4986
	    STORE_NUMBER (p1, mcnt);
4987
            break;
4988
          }
4989
4990
#if 0
4991
	/* The DEC Alpha C compiler 3.x generates incorrect code for the
4992
	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
4993
	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
4994
	   macro and introducing temporary variables works around the bug.  */
4995
4996
	case wordbound:
4997
	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4998
	  if (AT_WORD_BOUNDARY (d))
4999
	    break;
5000
	  goto fail;
5001
5002
	case notwordbound:
5003
	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5004
	  if (AT_WORD_BOUNDARY (d))
5005
	    goto fail;
5006
	  break;
5007
#else
5008
	case wordbound:
5009
	{
5010
	  boolean prevchar, thischar;
5011
5012
	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
5013
	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5014
	    break;
5015
5016
	  prevchar = WORDCHAR_P (d - 1);
5017
	  thischar = WORDCHAR_P (d);
5018
	  if (prevchar != thischar)
5019
	    break;
5020
	  goto fail;
5021
	}
5022
5023
      case notwordbound:
5024
	{
5025
	  boolean prevchar, thischar;
5026
5027
	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5028
	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5029
	    goto fail;
5030
5031
	  prevchar = WORDCHAR_P (d - 1);
5032
	  thischar = WORDCHAR_P (d);
5033
	  if (prevchar != thischar)
5034
	    goto fail;
5035
	  break;
5036
	}
5037
#endif
5038
5039
	case wordbeg:
5040
          DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5041
	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
5042
	    break;
5043
          goto fail;
5044
5045
	case wordend:
5046
          DEBUG_PRINT1 ("EXECUTING wordend.\n");
5047
	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
5048
              && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
5049
	    break;
5050
          goto fail;
5051
5052
#ifdef emacs
5053
  	case before_dot:
5054
          DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5055
 	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
5056
  	    goto fail;
5057
  	  break;
5058
5059
  	case at_dot:
5060
          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5061
 	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
5062
  	    goto fail;
5063
  	  break;
5064
5065
  	case after_dot:
5066
          DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5067
          if (PTR_CHAR_POS ((unsigned char *) d) <= point)
5068
  	    goto fail;
5069
  	  break;
5070
5071
	case syntaxspec:
5072
          DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
5073
	  mcnt = *p++;
5074
	  goto matchsyntax;
5075
5076
        case wordchar:
5077
          DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5078
	  mcnt = (int) Sword;
5079
        matchsyntax:
5080
	  PREFETCH ();
5081
	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5082
	  d++;
5083
	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
5084
	    goto fail;
5085
          SET_REGS_MATCHED ();
5086
	  break;
5087
5088
	case notsyntaxspec:
5089
          DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
5090
	  mcnt = *p++;
5091
	  goto matchnotsyntax;
5092
5093
        case notwordchar:
5094
          DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5095
	  mcnt = (int) Sword;
5096
        matchnotsyntax:
5097
	  PREFETCH ();
5098
	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5099
	  d++;
5100
	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
5101
	    goto fail;
5102
	  SET_REGS_MATCHED ();
5103
          break;
5104
5105
#else /* not emacs */
5106
	case wordchar:
5107
          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5108
	  PREFETCH ();
5109
          if (!WORDCHAR_P (d))
5110
            goto fail;
5111
	  SET_REGS_MATCHED ();
5112
          d++;
5113
	  break;
5114
5115
	case notwordchar:
5116
          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5117
	  PREFETCH ();
5118
	  if (WORDCHAR_P (d))
5119
            goto fail;
5120
          SET_REGS_MATCHED ();
5121
          d++;
5122
	  break;
5123
#endif /* not emacs */
5124
5125
        default:
5126
          abort ();
5127
	}
5128
      continue;  /* Successfully executed one pattern command; keep going.  */
5129
5130
5131
    /* We goto here if a matching operation fails. */
5132
    fail:
5133
      if (!FAIL_STACK_EMPTY ())
5134
	{ /* A restart point is known.  Restore to that state.  */
5135
          DEBUG_PRINT1 ("\nFAIL:\n");
5136
          POP_FAILURE_POINT (d, p,
5137
                             lowest_active_reg, highest_active_reg,
5138
                             regstart, regend, reg_info);
5139
5140
          /* If this failure point is a dummy, try the next one.  */
5141
          if (!p)
5142
	    goto fail;
5143
5144
          /* If we failed to the end of the pattern, don't examine *p.  */
5145
	  assert (p <= pend);
5146
          if (p < pend)
5147
            {
5148
              boolean is_a_jump_n = false;
5149
5150
              /* If failed to a backwards jump that's part of a repetition
5151
                 loop, need to pop this failure point and use the next one.  */
5152
              switch ((re_opcode_t) *p)
5153
                {
5154
                case jump_n:
5155
                  is_a_jump_n = true;
5156
                case maybe_pop_jump:
5157
                case pop_failure_jump:
5158
                case jump:
5159
                  p1 = p + 1;
5160
                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5161
                  p1 += mcnt;
5162
5163
                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5164
                      || (!is_a_jump_n
5165
                          && (re_opcode_t) *p1 == on_failure_jump))
5166
                    goto fail;
5167
                  break;
5168
                default:
5169
                  /* do nothing */ ;
5170
                }
5171
            }
5172
5173
          if (d >= string1 && d <= end1)
5174
	    dend = end_match_1;
5175
        }
5176
      else
5177
        break;   /* Matching at this starting point really fails.  */
5178
    } /* for (;;) */
5179
5180
  if (best_regs_set)
5181
    goto restore_best_regs;
5182
5183
  FREE_VARIABLES ();
5184
5185
  return -1;         			/* Failure to match.  */
5186
} /* re_match_2 */
5187
5188
/* Subroutine definitions for re_match_2.  */
5189
5190
5191
/* We are passed P pointing to a register number after a start_memory.
5192
5193
   Return true if the pattern up to the corresponding stop_memory can
5194
   match the empty string, and false otherwise.
5195
5196
   If we find the matching stop_memory, sets P to point to one past its number.
5197
   Otherwise, sets P to an undefined byte less than or equal to END.
5198
5199
   We don't handle duplicates properly (yet).  */
5200
5201
static boolean
5202
group_match_null_string_p (p, end, reg_info)
5203
    unsigned char **p, *end;
5204
    register_info_type *reg_info;
5205
{
5206
  int mcnt;
5207
  /* Point to after the args to the start_memory.  */
5208
  unsigned char *p1 = *p + 2;
5209
5210
  while (p1 < end)
5211
    {
5212
      /* Skip over opcodes that can match nothing, and return true or
5213
	 false, as appropriate, when we get to one that can't, or to the
5214
         matching stop_memory.  */
5215
5216
      switch ((re_opcode_t) *p1)
5217
        {
5218
        /* Could be either a loop or a series of alternatives.  */
5219
        case on_failure_jump:
5220
          p1++;
5221
          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5222
5223
          /* If the next operation is not a jump backwards in the
5224
	     pattern.  */
5225
5226
	  if (mcnt >= 0)
5227
	    {
5228
              /* Go through the on_failure_jumps of the alternatives,
5229
                 seeing if any of the alternatives cannot match nothing.
5230
                 The last alternative starts with only a jump,
5231
                 whereas the rest start with on_failure_jump and end
5232
                 with a jump, e.g., here is the pattern for `a|b|c':
5233
5234
                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
5235
                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
5236
                 /exactn/1/c
5237
5238
                 So, we have to first go through the first (n-1)
5239
                 alternatives and then deal with the last one separately.  */
5240
5241
5242
              /* Deal with the first (n-1) alternatives, which start
5243
                 with an on_failure_jump (see above) that jumps to right
5244
                 past a jump_past_alt.  */
5245
5246
              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
5247
                {
5248
                  /* `mcnt' holds how many bytes long the alternative
5249
                     is, including the ending `jump_past_alt' and
5250
                     its number.  */
5251
5252
                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
5253
				                      reg_info))
5254
                    return false;
5255
5256
                  /* Move to right after this alternative, including the
5257
		     jump_past_alt.  */
5258
                  p1 += mcnt;
5259
5260
                  /* Break if it's the beginning of an n-th alternative
5261
                     that doesn't begin with an on_failure_jump.  */
5262
                  if ((re_opcode_t) *p1 != on_failure_jump)
5263
                    break;
5264
5265
		  /* Still have to check that it's not an n-th
5266
		     alternative that starts with an on_failure_jump.  */
5267
		  p1++;
5268
                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5269
                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
5270
                    {
5271
		      /* Get to the beginning of the n-th alternative.  */
5272
                      p1 -= 3;
5273
                      break;
5274
                    }
5275
                }
5276
5277
              /* Deal with the last alternative: go back and get number
5278
                 of the `jump_past_alt' just before it.  `mcnt' contains
5279
                 the length of the alternative.  */
5280
              EXTRACT_NUMBER (mcnt, p1 - 2);
5281
5282
              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5283
                return false;
5284
5285
              p1 += mcnt;	/* Get past the n-th alternative.  */
5286
            } /* if mcnt > 0 */
5287
          break;
5288
5289
5290
        case stop_memory:
5291
	  assert (p1[1] == **p);
5292
          *p = p1 + 2;
5293
          return true;
5294
5295
5296
        default:
5297
          if (!common_op_match_null_string_p (&p1, end, reg_info))
5298
            return false;
5299
        }
5300
    } /* while p1 < end */
5301
5302
  return false;
5303
} /* group_match_null_string_p */
5304
5305
5306
/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5307
   It expects P to be the first byte of a single alternative and END one
5308
   byte past the last. The alternative can contain groups.  */
5309
5310
static boolean
5311
alt_match_null_string_p (p, end, reg_info)
5312
    unsigned char *p, *end;
5313
    register_info_type *reg_info;
5314
{
5315
  int mcnt;
5316
  unsigned char *p1 = p;
5317
5318
  while (p1 < end)
5319
    {
5320
      /* Skip over opcodes that can match nothing, and break when we get
5321
         to one that can't.  */
5322
5323
      switch ((re_opcode_t) *p1)
5324
        {
5325
	/* It's a loop.  */
5326
        case on_failure_jump:
5327
          p1++;
5328
          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5329
          p1 += mcnt;
5330
          break;
5331
5332
	default:
5333
          if (!common_op_match_null_string_p (&p1, end, reg_info))
5334
            return false;
5335
        }
5336
    }  /* while p1 < end */
5337
5338
  return true;
5339
} /* alt_match_null_string_p */
5340
5341
5342
/* Deals with the ops common to group_match_null_string_p and
5343
   alt_match_null_string_p.
5344
5345
   Sets P to one after the op and its arguments, if any.  */
5346
5347
static boolean
5348
common_op_match_null_string_p (p, end, reg_info)
5349
    unsigned char **p, *end;
5350
    register_info_type *reg_info;
5351
{
5352
  int mcnt;
5353
  boolean ret;
5354
  int reg_no;
5355
  unsigned char *p1 = *p;
5356
5357
  switch ((re_opcode_t) *p1++)
5358
    {
5359
    case no_op:
5360
    case begline:
5361
    case endline:
5362
    case begbuf:
5363
    case endbuf:
5364
    case wordbeg:
5365
    case wordend:
5366
    case wordbound:
5367
    case notwordbound:
5368
#ifdef emacs
5369
    case before_dot:
5370
    case at_dot:
5371
    case after_dot:
5372
#endif
5373
      break;
5374
5375
    case start_memory:
5376
      reg_no = *p1;
5377
      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5378
      ret = group_match_null_string_p (&p1, end, reg_info);
5379
5380
      /* Have to set this here in case we're checking a group which
5381
         contains a group and a back reference to it.  */
5382
5383
      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5384
        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5385
5386
      if (!ret)
5387
        return false;
5388
      break;
5389
5390
    /* If this is an optimized succeed_n for zero times, make the jump.  */
5391
    case jump:
5392
      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5393
      if (mcnt >= 0)
5394
        p1 += mcnt;
5395
      else
5396
        return false;
5397
      break;
5398
5399
    case succeed_n:
5400
      /* Get to the number of times to succeed.  */
5401
      p1 += 2;
5402
      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5403
5404
      if (mcnt == 0)
5405
        {
5406
          p1 -= 4;
5407
          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5408
          p1 += mcnt;
5409
        }
5410
      else
5411
        return false;
5412
      break;
5413
5414
    case duplicate:
5415
      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5416
        return false;
5417
      break;
5418
5419
    case set_number_at:
5420
      p1 += 4;
5421
5422
    default:
5423
      /* All other opcodes mean we cannot match the empty string.  */
5424
      return false;
5425
  }
5426
5427
  *p = p1;
5428
  return true;
5429
} /* common_op_match_null_string_p */
5430
5431
5432
/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5433
   bytes; nonzero otherwise.  */
5434
5435
static int
5436
bcmp_translate (s1, s2, len, translate)
5437
     const char *s1, *s2;
5438
     register int len;
5439
     RE_TRANSLATE_TYPE translate;
5440
{
5441
  register const unsigned char *p1 = (const unsigned char *) s1;
5442
  register const unsigned char *p2 = (const unsigned char *) s2;
5443
  while (len)
5444
    {
5445
      if (translate[*p1++] != translate[*p2++]) return 1;
5446
      len--;
5447
    }
5448
  return 0;
5449
}
5450
5451
/* Entry points for GNU code.  */
5452
5453
/* re_compile_pattern is the GNU regular expression compiler: it
5454
   compiles PATTERN (of length SIZE) and puts the result in BUFP.
5455
   Returns 0 if the pattern was valid, otherwise an error string.
5456
5457
   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5458
   are set in BUFP on entry.
5459
5460
   We call regex_compile to do the actual compilation.  */
5461
5462
const char *
5463
re_compile_pattern (pattern, length, bufp)
5464
     const char *pattern;
5465
     size_t length;
5466
     struct re_pattern_buffer *bufp;
5467
{
5468
  reg_errcode_t ret;
5469
5470
  /* GNU code is written to assume at least RE_NREGS registers will be set
5471
     (and at least one extra will be -1).  */
5472
  bufp->regs_allocated = REGS_UNALLOCATED;
5473
5474
  /* And GNU code determines whether or not to get register information
5475
     by passing null for the REGS argument to re_match, etc., not by
5476
     setting no_sub.  */
5477
  bufp->no_sub = 0;
5478
5479
  /* Match anchors at newline.  */
5480
  bufp->newline_anchor = 1;
5481
5482
  ret = regex_compile (pattern, length, re_syntax_options, bufp);
5483
5484
  if (!ret)
5485
    return NULL;
5486
  return gettext (re_error_msgid[(int) ret]);
5487
}
5488
#ifdef _LIBC
5489
weak_alias (__re_compile_pattern, re_compile_pattern)
5490
#endif
5491
5492
/* Entry points compatible with 4.2 BSD regex library.  We don't define
5493
   them unless specifically requested.  */
5494
5495
#if defined _REGEX_RE_COMP || defined _LIBC
5496
5497
/* BSD has one and only one pattern buffer.  */
5498
static struct re_pattern_buffer re_comp_buf;
5499
5500
char *
5501
#ifdef _LIBC
5502
/* Make these definitions weak in libc, so POSIX programs can redefine
5503
   these names if they don't use our functions, and still use
5504
   regcomp/regexec below without link errors.  */
5505
weak_function
5506
#endif
5507
re_comp (s)
5508
    const char *s;
5509
{
5510
  reg_errcode_t ret;
5511
5512
  if (!s)
5513
    {
5514
      if (!re_comp_buf.buffer)
5515
	return gettext ("No previous regular expression");
5516
      return 0;
5517
    }
5518
5519
  if (!re_comp_buf.buffer)
5520
    {
5521
      re_comp_buf.buffer = (unsigned char *) malloc (200);
5522
      if (re_comp_buf.buffer == NULL)
5523
        return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
5524
      re_comp_buf.allocated = 200;
5525
5526
      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5527
      if (re_comp_buf.fastmap == NULL)
5528
	return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
5529
    }
5530
5531
  /* Since `re_exec' always passes NULL for the `regs' argument, we
5532
     don't need to initialize the pattern buffer fields which affect it.  */
5533
5534
  /* Match anchors at newlines.  */
5535
  re_comp_buf.newline_anchor = 1;
5536
5537
  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5538
5539
  if (!ret)
5540
    return NULL;
5541
5542
  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
5543
  return (char *) gettext (re_error_msgid[(int) ret]);
5544
}
5545
5546
5547
int
5548
#ifdef _LIBC
5549
weak_function
5550
#endif
5551
re_exec (s)
5552
    const char *s;
5553
{
5554
  const int len = strlen (s);
5555
  return
5556
    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
5557
}
5558
5559
#endif /* _REGEX_RE_COMP */
5560
5561
/* POSIX.2 functions.  Don't define these for Emacs.  */
5562
5563
#ifndef emacs
5564
5565
/* regcomp takes a regular expression as a string and compiles it.
5566
5567
   PREG is a regex_t *.  We do not expect any fields to be initialized,
5568
   since POSIX says we shouldn't.  Thus, we set
5569
5570
     `buffer' to the compiled pattern;
5571
     `used' to the length of the compiled pattern;
5572
     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5573
       REG_EXTENDED bit in CFLAGS is set; otherwise, to
5574
       RE_SYNTAX_POSIX_BASIC;
5575
     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
5576
     `fastmap' to an allocated space for the fastmap;
5577
     `fastmap_accurate' to zero;
5578
     `re_nsub' to the number of subexpressions in PATTERN.
5579
5580
   PATTERN is the address of the pattern string.
5581
5582
   CFLAGS is a series of bits which affect compilation.
5583
5584
     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
5585
     use POSIX basic syntax.
5586
5587
     If REG_NEWLINE is set, then . and [^...] don't match newline.
5588
     Also, regexec will try a match beginning after every newline.
5589
5590
     If REG_ICASE is set, then we considers upper- and lowercase
5591
     versions of letters to be equivalent when matching.
5592
5593
     If REG_NOSUB is set, then when PREG is passed to regexec, that
5594
     routine will report only success or failure, and nothing about the
5595
     registers.
5596
5597
   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
5598
   the return codes and their meanings.)  */
5599
5600
int
5601
regcomp (preg, pattern, cflags)
5602
    regex_t *preg;
5603
    const char *pattern;
5604
    int cflags;
5605
{
5606
  reg_errcode_t ret;
5607
  reg_syntax_t syntax
5608
    = (cflags & REG_EXTENDED) ?
5609
      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5610
5611
  /* regex_compile will allocate the space for the compiled pattern.  */
5612
  preg->buffer = 0;
5613
  preg->allocated = 0;
5614
  preg->used = 0;
5615
5616
  /* Try to allocate space for the fastmap.  */
5617
  preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
5618
5619
  if (cflags & REG_ICASE)
5620
    {
5621
      unsigned i;
5622
5623
      preg->translate
5624
	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5625
				      * sizeof (*(RE_TRANSLATE_TYPE)0));
5626
      if (preg->translate == NULL)
5627
        return (int) REG_ESPACE;
5628
5629
      /* Map uppercase characters to corresponding lowercase ones.  */
5630
      for (i = 0; i < CHAR_SET_SIZE; i++)
5631
        preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
5632
    }
5633
  else
5634
    preg->translate = NULL;
5635
5636
  /* If REG_NEWLINE is set, newlines are treated differently.  */
5637
  if (cflags & REG_NEWLINE)
5638
    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
5639
      syntax &= ~RE_DOT_NEWLINE;
5640
      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5641
      /* It also changes the matching behavior.  */
5642
      preg->newline_anchor = 1;
5643
    }
5644
  else
5645
    preg->newline_anchor = 0;
5646
5647
  preg->no_sub = !!(cflags & REG_NOSUB);
5648
5649
  /* POSIX says a null character in the pattern terminates it, so we
5650
     can use strlen here in compiling the pattern.  */
5651
  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
5652
5653
  /* POSIX doesn't distinguish between an unmatched open-group and an
5654
     unmatched close-group: both are REG_EPAREN.  */
5655
  if (ret == REG_ERPAREN) ret = REG_EPAREN;
5656
5657
  if (ret == REG_NOERROR && preg->fastmap)
5658
    {
5659
      /* Compute the fastmap now, since regexec cannot modify the pattern
5660
	 buffer.  */
5661
      if (re_compile_fastmap (preg) == -2)
5662
	{
5663
	  /* Some error occured while computing the fastmap, just forget
5664
	     about it.  */
5665
	  free (preg->fastmap);
5666
	  preg->fastmap = NULL;
5667
	}
5668
    }
5669
5670
  return (int) ret;
5671
}
5672
#ifdef _LIBC
5673
weak_alias (__regcomp, regcomp)
5674
#endif
5675
5676
5677
/* regexec searches for a given pattern, specified by PREG, in the
5678
   string STRING.
5679
5680
   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
5681
   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
5682
   least NMATCH elements, and we set them to the offsets of the
5683
   corresponding matched substrings.
5684
5685
   EFLAGS specifies `execution flags' which affect matching: if
5686
   REG_NOTBOL is set, then ^ does not match at the beginning of the
5687
   string; if REG_NOTEOL is set, then $ does not match at the end.
5688
5689
   We return 0 if we find a match and REG_NOMATCH if not.  */
5690
5691
int
5692
regexec (preg, string, nmatch, pmatch, eflags)
5693
    const regex_t *preg;
5694
    const char *string;
5695
    size_t nmatch;
5696
    regmatch_t pmatch[];
5697
    int eflags;
5698
{
5699
  int ret;
5700
  struct re_registers regs;
5701
  regex_t private_preg;
5702
  int len = strlen (string);
5703
  boolean want_reg_info = !preg->no_sub && nmatch > 0;
5704
5705
  private_preg = *preg;
5706
5707
  private_preg.not_bol = !!(eflags & REG_NOTBOL);
5708
  private_preg.not_eol = !!(eflags & REG_NOTEOL);
5709
5710
  /* The user has told us exactly how many registers to return
5711
     information about, via `nmatch'.  We have to pass that on to the
5712
     matching routines.  */
5713
  private_preg.regs_allocated = REGS_FIXED;
5714
5715
  if (want_reg_info)
5716
    {
5717
      regs.num_regs = nmatch;
5718
      regs.start = TALLOC (nmatch * 2, regoff_t);
5719
      if (regs.start == NULL)
5720
        return (int) REG_NOMATCH;
5721
      regs.end = regs.start + nmatch;
5722
    }
5723
5724
  /* Perform the searching operation.  */
5725
  ret = re_search (&private_preg, string, len,
5726
                   /* start: */ 0, /* range: */ len,
5727
                   want_reg_info ? &regs : (struct re_registers *) 0);
5728
5729
  /* Copy the register information to the POSIX structure.  */
5730
  if (want_reg_info)
5731
    {
5732
      if (ret >= 0)
5733
        {
5734
          unsigned r;
5735
5736
          for (r = 0; r < nmatch; r++)
5737
            {
5738
              pmatch[r].rm_so = regs.start[r];
5739
              pmatch[r].rm_eo = regs.end[r];
5740
            }
5741
        }
5742
5743
      /* If we needed the temporary register info, free the space now.  */
5744
      free (regs.start);
5745
    }
5746
5747
  /* We want zero return to mean success, unlike `re_search'.  */
5748
  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5749
}
5750
#ifdef _LIBC
5751
weak_alias (__regexec, regexec)
5752
#endif
5753
5754
5755
/* Returns a message corresponding to an error code, ERRCODE, returned
5756
   from either regcomp or regexec.   We don't use PREG here.  */
5757
5758
size_t
5759
regerror (errcode, preg, errbuf, errbuf_size)
5760
    int errcode;
5761
    const regex_t *preg;
5762
    char *errbuf;
5763
    size_t errbuf_size;
5764
{
5765
  const char *msg;
5766
  size_t msg_size;
5767
5768
  if (errcode < 0
5769
      || errcode >= (int) (sizeof (re_error_msgid)
5770
			   / sizeof (re_error_msgid[0])))
5771
    /* Only error codes returned by the rest of the code should be passed
5772
       to this routine.  If we are given anything else, or if other regex
5773
       code generates an invalid error code, then the program has a bug.
5774
       Dump core so we can fix it.  */
5775
    abort ();
5776
5777
  msg = gettext (re_error_msgid[errcode]);
5778
5779
  msg_size = strlen (msg) + 1; /* Includes the null.  */
5780
5781
  if (errbuf_size != 0)
5782
    {
5783
      if (msg_size > errbuf_size)
5784
        {
5785
#if defined HAVE_MEMPCPY || defined _LIBC
5786
	  *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
5787
#else
5788
          memcpy (errbuf, msg, errbuf_size - 1);
5789
          errbuf[errbuf_size - 1] = 0;
5790
#endif
5791
        }
5792
      else
5793
        memcpy (errbuf, msg, msg_size);
5794
    }
5795
5796
  return msg_size;
5797
}
5798
#ifdef _LIBC
5799
weak_alias (__regerror, regerror)
5800
#endif
5801
5802
5803
/* Free dynamically allocated space used by PREG.  */
5804
5805
void
5806
regfree (preg)
5807
    regex_t *preg;
5808
{
5809
  if (preg->buffer != NULL)
5810
    free (preg->buffer);
5811
  preg->buffer = NULL;
5812
5813
  preg->allocated = 0;
5814
  preg->used = 0;
5815
5816
  if (preg->fastmap != NULL)
5817
    free (preg->fastmap);
5818
  preg->fastmap = NULL;
5819
  preg->fastmap_accurate = 0;
5820
5821
  if (preg->translate != NULL)
5822
    free (preg->translate);
5823
  preg->translate = NULL;
5824
}
5825
#ifdef _LIBC
5826
weak_alias (__regfree, regfree)
5827
#endif
5828
5829
#endif /* not emacs  */
(-)grep/regex.h (+572 lines)
Line 0 Link Here
1
/* Definitions for data structures and routines for the regular
2
   expression library, version 0.12.
3
   Copyright (C) 1985,89,90,91,92,93,95,96,97,98 Free Software Foundation, Inc.
4
5
   This file is part of the GNU C Library.  Its master source is NOT part of
6
   the C library, however.  The master source lives in /gd/gnu/lib.
7
8
   The GNU C Library is free software; you can redistribute it and/or
9
   modify it under the terms of the GNU Library General Public License as
10
   published by the Free Software Foundation; either version 2 of the
11
   License, or (at your option) any later version.
12
13
   The GNU C Library is distributed in the hope that it will be useful,
14
   but WITHOUT ANY WARRANTY; without even the implied warranty of
15
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
   Library General Public License for more details.
17
18
   You should have received a copy of the GNU Library General Public
19
   License along with the GNU C Library; see the file COPYING.LIB.  If not,
20
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21
   Boston, MA 02111-1307, USA.  */
22
23
#ifndef _REGEX_H
24
#define _REGEX_H 1
25
26
/* Allow the use in C++ code.  */
27
#ifdef __cplusplus
28
extern "C" {
29
#endif
30
31
/* POSIX says that <sys/types.h> must be included (by the caller) before
32
   <regex.h>.  */
33
34
#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
35
/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
36
   should be there.  */
37
# include <stddef.h>
38
#endif
39
40
/* The following two types have to be signed and unsigned integer type
41
   wide enough to hold a value of a pointer.  For most ANSI compilers
42
   ptrdiff_t and size_t should be likely OK.  Still size of these two
43
   types is 2 for Microsoft C.  Ugh... */
44
typedef long int s_reg_t;
45
typedef unsigned long int active_reg_t;
46
47
/* The following bits are used to determine the regexp syntax we
48
   recognize.  The set/not-set meanings are chosen so that Emacs syntax
49
   remains the value 0.  The bits are given in alphabetical order, and
50
   the definitions shifted by one from the previous bit; thus, when we
51
   add or remove a bit, only one other definition need change.  */
52
typedef unsigned long int reg_syntax_t;
53
54
/* If this bit is not set, then \ inside a bracket expression is literal.
55
   If set, then such a \ quotes the following character.  */
56
#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
57
58
/* If this bit is not set, then + and ? are operators, and \+ and \? are
59
     literals.
60
   If set, then \+ and \? are operators and + and ? are literals.  */
61
#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
62
63
/* If this bit is set, then character classes are supported.  They are:
64
     [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
65
     [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
66
   If not set, then character classes are not supported.  */
67
#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
68
69
/* If this bit is set, then ^ and $ are always anchors (outside bracket
70
     expressions, of course).
71
   If this bit is not set, then it depends:
72
        ^  is an anchor if it is at the beginning of a regular
73
           expression or after an open-group or an alternation operator;
74
        $  is an anchor if it is at the end of a regular expression, or
75
           before a close-group or an alternation operator.
76
77
   This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
78
   POSIX draft 11.2 says that * etc. in leading positions is undefined.
79
   We already implemented a previous draft which made those constructs
80
   invalid, though, so we haven't changed the code back.  */
81
#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
82
83
/* If this bit is set, then special characters are always special
84
     regardless of where they are in the pattern.
85
   If this bit is not set, then special characters are special only in
86
     some contexts; otherwise they are ordinary.  Specifically,
87
     * + ? and intervals are only special when not after the beginning,
88
     open-group, or alternation operator.  */
89
#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
90
91
/* If this bit is set, then *, +, ?, and { cannot be first in an re or
92
     immediately after an alternation or begin-group operator.  */
93
#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
94
95
/* If this bit is set, then . matches newline.
96
   If not set, then it doesn't.  */
97
#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
98
99
/* If this bit is set, then . doesn't match NUL.
100
   If not set, then it does.  */
101
#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
102
103
/* If this bit is set, nonmatching lists [^...] do not match newline.
104
   If not set, they do.  */
105
#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
106
107
/* If this bit is set, either \{...\} or {...} defines an
108
     interval, depending on RE_NO_BK_BRACES.
109
   If not set, \{, \}, {, and } are literals.  */
110
#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
111
112
/* If this bit is set, +, ? and | aren't recognized as operators.
113
   If not set, they are.  */
114
#define RE_LIMITED_OPS (RE_INTERVALS << 1)
115
116
/* If this bit is set, newline is an alternation operator.
117
   If not set, newline is literal.  */
118
#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
119
120
/* If this bit is set, then `{...}' defines an interval, and \{ and \}
121
     are literals.
122
  If not set, then `\{...\}' defines an interval.  */
123
#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
124
125
/* If this bit is set, (...) defines a group, and \( and \) are literals.
126
   If not set, \(...\) defines a group, and ( and ) are literals.  */
127
#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
128
129
/* If this bit is set, then \<digit> matches <digit>.
130
   If not set, then \<digit> is a back-reference.  */
131
#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
132
133
/* If this bit is set, then | is an alternation operator, and \| is literal.
134
   If not set, then \| is an alternation operator, and | is literal.  */
135
#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
136
137
/* If this bit is set, then an ending range point collating higher
138
     than the starting range point, as in [z-a], is invalid.
139
   If not set, then when ending range point collates higher than the
140
     starting range point, the range is ignored.  */
141
#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
142
143
/* If this bit is set, then an unmatched ) is ordinary.
144
   If not set, then an unmatched ) is invalid.  */
145
#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
146
147
/* If this bit is set, succeed as soon as we match the whole pattern,
148
   without further backtracking.  */
149
#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
150
151
/* If this bit is set, do not process the GNU regex operators.
152
   If not set, then the GNU regex operators are recognized. */
153
#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
154
155
/* If this bit is set, turn on internal regex debugging.
156
   If not set, and debugging was on, turn it off.
157
   This only works if regex.c is compiled -DDEBUG.
158
   We define this bit always, so that all that's needed to turn on
159
   debugging is to recompile regex.c; the calling code can always have
160
   this bit set, and it won't affect anything in the normal case. */
161
#define RE_DEBUG (RE_NO_GNU_OPS << 1)
162
163
/* This global variable defines the particular regexp syntax to use (for
164
   some interfaces).  When a regexp is compiled, the syntax used is
165
   stored in the pattern buffer, so changing this does not affect
166
   already-compiled regexps.  */
167
extern reg_syntax_t re_syntax_options;
168
169
/* Define combinations of the above bits for the standard possibilities.
170
   (The [[[ comments delimit what gets put into the Texinfo file, so
171
   don't delete them!)  */
172
/* [[[begin syntaxes]]] */
173
#define RE_SYNTAX_EMACS 0
174
175
#define RE_SYNTAX_AWK							\
176
  (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL			\
177
   | RE_NO_BK_PARENS              | RE_NO_BK_REFS			\
178
   | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES			\
179
   | RE_DOT_NEWLINE		  | RE_CONTEXT_INDEP_ANCHORS		\
180
   | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
181
182
#define RE_SYNTAX_GNU_AWK						\
183
  ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG)	\
184
   & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS))
185
186
#define RE_SYNTAX_POSIX_AWK 						\
187
  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
188
   | RE_INTERVALS	    | RE_NO_GNU_OPS)
189
190
#define RE_SYNTAX_GREP							\
191
  (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
192
   | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
193
   | RE_NEWLINE_ALT)
194
195
#define RE_SYNTAX_EGREP							\
196
  (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
197
   | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
198
   | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
199
   | RE_NO_BK_VBAR)
200
201
#define RE_SYNTAX_POSIX_EGREP						\
202
  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
203
204
/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
205
#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
206
207
#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
208
209
/* Syntax bits common to both basic and extended POSIX regex syntax.  */
210
#define _RE_SYNTAX_POSIX_COMMON						\
211
  (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
212
   | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
213
214
#define RE_SYNTAX_POSIX_BASIC						\
215
  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
216
217
/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
218
   RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
219
   isn't minimal, since other operators, such as \`, aren't disabled.  */
220
#define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
221
  (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
222
223
#define RE_SYNTAX_POSIX_EXTENDED					\
224
  (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS			\
225
   | RE_CONTEXT_INDEP_OPS  | RE_NO_BK_BRACES				\
226
   | RE_NO_BK_PARENS       | RE_NO_BK_VBAR				\
227
   | RE_UNMATCHED_RIGHT_PAREN_ORD)
228
229
/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
230
   replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added.  */
231
#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
232
  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
233
   | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
234
   | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
235
   | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
236
/* [[[end syntaxes]]] */
237
238
/* Maximum number of duplicates an interval can allow.  Some systems
239
   (erroneously) define this in other header files, but we want our
240
   value, so remove any previous define.  */
241
#ifdef RE_DUP_MAX
242
# undef RE_DUP_MAX
243
#endif
244
/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows.  */
245
#define RE_DUP_MAX (0x7fff)
246
247
248
/* POSIX `cflags' bits (i.e., information for `regcomp').  */
249
250
/* If this bit is set, then use extended regular expression syntax.
251
   If not set, then use basic regular expression syntax.  */
252
#define REG_EXTENDED 1
253
254
/* If this bit is set, then ignore case when matching.
255
   If not set, then case is significant.  */
256
#define REG_ICASE (REG_EXTENDED << 1)
257
258
/* If this bit is set, then anchors do not match at newline
259
     characters in the string.
260
   If not set, then anchors do match at newlines.  */
261
#define REG_NEWLINE (REG_ICASE << 1)
262
263
/* If this bit is set, then report only success or fail in regexec.
264
   If not set, then returns differ between not matching and errors.  */
265
#define REG_NOSUB (REG_NEWLINE << 1)
266
267
268
/* POSIX `eflags' bits (i.e., information for regexec).  */
269
270
/* If this bit is set, then the beginning-of-line operator doesn't match
271
     the beginning of the string (presumably because it's not the
272
     beginning of a line).
273
   If not set, then the beginning-of-line operator does match the
274
     beginning of the string.  */
275
#define REG_NOTBOL 1
276
277
/* Like REG_NOTBOL, except for the end-of-line.  */
278
#define REG_NOTEOL (1 << 1)
279
280
281
/* If any error codes are removed, changed, or added, update the
282
   `re_error_msg' table in regex.c.  */
283
typedef enum
284
{
285
#ifdef _XOPEN_SOURCE
286
  REG_ENOSYS = -1,	/* This will never happen for this implementation.  */
287
#endif
288
289
  REG_NOERROR = 0,	/* Success.  */
290
  REG_NOMATCH,		/* Didn't find a match (for regexec).  */
291
292
  /* POSIX regcomp return error codes.  (In the order listed in the
293
     standard.)  */
294
  REG_BADPAT,		/* Invalid pattern.  */
295
  REG_ECOLLATE,		/* Not implemented.  */
296
  REG_ECTYPE,		/* Invalid character class name.  */
297
  REG_EESCAPE,		/* Trailing backslash.  */
298
  REG_ESUBREG,		/* Invalid back reference.  */
299
  REG_EBRACK,		/* Unmatched left bracket.  */
300
  REG_EPAREN,		/* Parenthesis imbalance.  */
301
  REG_EBRACE,		/* Unmatched \{.  */
302
  REG_BADBR,		/* Invalid contents of \{\}.  */
303
  REG_ERANGE,		/* Invalid range end.  */
304
  REG_ESPACE,		/* Ran out of memory.  */
305
  REG_BADRPT,		/* No preceding re for repetition op.  */
306
307
  /* Error codes we've added.  */
308
  REG_EEND,		/* Premature end.  */
309
  REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
310
  REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
311
} reg_errcode_t;
312
313
/* This data structure represents a compiled pattern.  Before calling
314
   the pattern compiler, the fields `buffer', `allocated', `fastmap',
315
   `translate', and `no_sub' can be set.  After the pattern has been
316
   compiled, the `re_nsub' field is available.  All other fields are
317
   private to the regex routines.  */
318
319
#ifndef RE_TRANSLATE_TYPE
320
# define RE_TRANSLATE_TYPE char *
321
#endif
322
323
struct re_pattern_buffer
324
{
325
/* [[[begin pattern_buffer]]] */
326
	/* Space that holds the compiled pattern.  It is declared as
327
          `unsigned char *' because its elements are
328
           sometimes used as array indexes.  */
329
  unsigned char *buffer;
330
331
	/* Number of bytes to which `buffer' points.  */
332
  unsigned long int allocated;
333
334
	/* Number of bytes actually used in `buffer'.  */
335
  unsigned long int used;
336
337
        /* Syntax setting with which the pattern was compiled.  */
338
  reg_syntax_t syntax;
339
340
        /* Pointer to a fastmap, if any, otherwise zero.  re_search uses
341
           the fastmap, if there is one, to skip over impossible
342
           starting points for matches.  */
343
  char *fastmap;
344
345
        /* Either a translate table to apply to all characters before
346
           comparing them, or zero for no translation.  The translation
347
           is applied to a pattern when it is compiled and to a string
348
           when it is matched.  */
349
  RE_TRANSLATE_TYPE translate;
350
351
	/* Number of subexpressions found by the compiler.  */
352
  size_t re_nsub;
353
354
        /* Zero if this pattern cannot match the empty string, one else.
355
           Well, in truth it's used only in `re_search_2', to see
356
           whether or not we should use the fastmap, so we don't set
357
           this absolutely perfectly; see `re_compile_fastmap' (the
358
           `duplicate' case).  */
359
  unsigned can_be_null : 1;
360
361
        /* If REGS_UNALLOCATED, allocate space in the `regs' structure
362
             for `max (RE_NREGS, re_nsub + 1)' groups.
363
           If REGS_REALLOCATE, reallocate space if necessary.
364
           If REGS_FIXED, use what's there.  */
365
#define REGS_UNALLOCATED 0
366
#define REGS_REALLOCATE 1
367
#define REGS_FIXED 2
368
  unsigned regs_allocated : 2;
369
370
        /* Set to zero when `regex_compile' compiles a pattern; set to one
371
           by `re_compile_fastmap' if it updates the fastmap.  */
372
  unsigned fastmap_accurate : 1;
373
374
        /* If set, `re_match_2' does not return information about
375
           subexpressions.  */
376
  unsigned no_sub : 1;
377
378
        /* If set, a beginning-of-line anchor doesn't match at the
379
           beginning of the string.  */
380
  unsigned not_bol : 1;
381
382
        /* Similarly for an end-of-line anchor.  */
383
  unsigned not_eol : 1;
384
385
        /* If true, an anchor at a newline matches.  */
386
  unsigned newline_anchor : 1;
387
388
/* [[[end pattern_buffer]]] */
389
};
390
391
typedef struct re_pattern_buffer regex_t;
392
393
/* Type for byte offsets within the string.  POSIX mandates this.  */
394
typedef int regoff_t;
395
396
397
/* This is the structure we store register match data in.  See
398
   regex.texinfo for a full description of what registers match.  */
399
struct re_registers
400
{
401
  unsigned num_regs;
402
  regoff_t *start;
403
  regoff_t *end;
404
};
405
406
407
/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
408
   `re_match_2' returns information about at least this many registers
409
   the first time a `regs' structure is passed.  */
410
#ifndef RE_NREGS
411
# define RE_NREGS 30
412
#endif
413
414
415
/* POSIX specification for registers.  Aside from the different names than
416
   `re_registers', POSIX uses an array of structures, instead of a
417
   structure of arrays.  */
418
typedef struct
419
{
420
  regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
421
  regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
422
} regmatch_t;
423
424
/* Declarations for routines.  */
425
426
/* To avoid duplicating every routine declaration -- once with a
427
   prototype (if we are ANSI), and once without (if we aren't) -- we
428
   use the following macro to declare argument types.  This
429
   unfortunately clutters up the declarations a bit, but I think it's
430
   worth it.  */
431
432
#if __STDC__
433
434
# define _RE_ARGS(args) args
435
436
#else /* not __STDC__ */
437
438
# define _RE_ARGS(args) ()
439
440
#endif /* not __STDC__ */
441
442
/* Sets the current default syntax to SYNTAX, and return the old syntax.
443
   You can also simply assign to the `re_syntax_options' variable.  */
444
extern reg_syntax_t __re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
445
extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
446
447
/* Compile the regular expression PATTERN, with length LENGTH
448
   and syntax given by the global `re_syntax_options', into the buffer
449
   BUFFER.  Return NULL if successful, and an error string if not.  */
450
extern const char *__re_compile_pattern
451
  _RE_ARGS ((const char *pattern, size_t length,
452
             struct re_pattern_buffer *buffer));
453
extern const char *re_compile_pattern
454
  _RE_ARGS ((const char *pattern, size_t length,
455
             struct re_pattern_buffer *buffer));
456
457
458
/* Compile a fastmap for the compiled pattern in BUFFER; used to
459
   accelerate searches.  Return 0 if successful and -2 if was an
460
   internal error.  */
461
extern int __re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
462
extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
463
464
465
/* Search in the string STRING (with length LENGTH) for the pattern
466
   compiled into BUFFER.  Start searching at position START, for RANGE
467
   characters.  Return the starting position of the match, -1 for no
468
   match, or -2 for an internal error.  Also return register
469
   information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
470
extern int __re_search
471
  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
472
            int length, int start, int range, struct re_registers *regs));
473
extern int re_search
474
  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
475
            int length, int start, int range, struct re_registers *regs));
476
477
478
/* Like `re_search', but search in the concatenation of STRING1 and
479
   STRING2.  Also, stop searching at index START + STOP.  */
480
extern int __re_search_2
481
  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
482
             int length1, const char *string2, int length2,
483
             int start, int range, struct re_registers *regs, int stop));
484
extern int re_search_2
485
  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
486
             int length1, const char *string2, int length2,
487
             int start, int range, struct re_registers *regs, int stop));
488
489
490
/* Like `re_search', but return how many characters in STRING the regexp
491
   in BUFFER matched, starting at position START.  */
492
extern int __re_match
493
  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
494
             int length, int start, struct re_registers *regs));
495
extern int re_match
496
  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
497
             int length, int start, struct re_registers *regs));
498
499
500
/* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
501
extern int __re_match_2
502
  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
503
             int length1, const char *string2, int length2,
504
             int start, struct re_registers *regs, int stop));
505
extern int re_match_2
506
  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
507
             int length1, const char *string2, int length2,
508
             int start, struct re_registers *regs, int stop));
509
510
511
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
512
   ENDS.  Subsequent matches using BUFFER and REGS will use this memory
513
   for recording register information.  STARTS and ENDS must be
514
   allocated with malloc, and must each be at least `NUM_REGS * sizeof
515
   (regoff_t)' bytes long.
516
517
   If NUM_REGS == 0, then subsequent matches should allocate their own
518
   register data.
519
520
   Unless this function is called, the first search or match using
521
   PATTERN_BUFFER will allocate its own register data, without
522
   freeing the old data.  */
523
extern void __re_set_registers
524
  _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
525
             unsigned num_regs, regoff_t *starts, regoff_t *ends));
526
extern void re_set_registers
527
  _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
528
             unsigned num_regs, regoff_t *starts, regoff_t *ends));
529
530
#ifdef _REGEX_RE_COMP
531
# ifndef _CRAY
532
/* 4.2 bsd compatibility.  */
533
extern char *re_comp _RE_ARGS ((const char *));
534
extern int re_exec _RE_ARGS ((const char *));
535
# endif
536
#endif
537
538
/* POSIX compatibility.  */
539
extern int __regcomp _RE_ARGS ((regex_t *__preg, const char *__pattern,
540
				int __cflags));
541
extern int regcomp _RE_ARGS ((regex_t *__preg, const char *__pattern,
542
			      int __cflags));
543
544
extern int __regexec _RE_ARGS ((const regex_t *__preg,
545
				const char *__string, size_t __nmatch,
546
				regmatch_t __pmatch[], int __eflags));
547
extern int regexec _RE_ARGS ((const regex_t *__preg,
548
			      const char *__string, size_t __nmatch,
549
			      regmatch_t __pmatch[], int __eflags));
550
551
extern size_t __regerror _RE_ARGS ((int __errcode, const regex_t *__preg,
552
				    char *__errbuf, size_t __errbuf_size));
553
extern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg,
554
				  char *__errbuf, size_t __errbuf_size));
555
556
extern void __regfree _RE_ARGS ((regex_t *__preg));
557
extern void regfree _RE_ARGS ((regex_t *__preg));
558
559
560
#ifdef __cplusplus
561
}
562
#endif	/* C++ */
563
564
#endif /* regex.h */
565
566
/*
567
Local variables:
568
make-backup-files: t
569
version-control: t
570
trim-versions-without-asking: nil
571
End:
572
*/
(-)grep/savedir.c (+135 lines)
Line 0 Link Here
1
/* savedir.c -- save the list of files in a directory in a string
2
   Copyright (C) 1990, 1997, 1998 Free Software Foundation, Inc.
3
4
   This program is free software; you can redistribute it and/or modify
5
   it under the terms of the GNU General Public License as published by
6
   the Free Software Foundation; either version 2, or (at your option)
7
   any later version.
8
9
   This program is distributed in the hope that it will be useful,
10
   but WITHOUT ANY WARRANTY; without even the implied warranty of
11
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
   GNU General Public License for more details.
13
14
   You should have received a copy of the GNU General Public License
15
   along with this program; if not, write to the Free Software Foundation,
16
   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
17
18
/* Written by David MacKenzie <djm@gnu.ai.mit.edu>. */
19
20
#if HAVE_CONFIG_H
21
# include <config.h>
22
#endif
23
24
#include <sys/types.h>
25
26
#if HAVE_UNISTD_H
27
# include <unistd.h>
28
#endif
29
30
#if HAVE_DIRENT_H
31
# include <dirent.h>
32
# define NAMLEN(dirent) strlen((dirent)->d_name)
33
#else
34
# define dirent direct
35
# define NAMLEN(dirent) (dirent)->d_namlen
36
# if HAVE_SYS_NDIR_H
37
#  include <sys/ndir.h>
38
# endif
39
# if HAVE_SYS_DIR_H
40
#  include <sys/dir.h>
41
# endif
42
# if HAVE_NDIR_H
43
#  include <ndir.h>
44
# endif
45
#endif
46
47
#ifdef CLOSEDIR_VOID
48
/* Fake a return value. */
49
# define CLOSEDIR(d) (closedir (d), 0)
50
#else
51
# define CLOSEDIR(d) closedir (d)
52
#endif
53
54
#ifdef STDC_HEADERS
55
# include <stdlib.h>
56
# include <string.h>
57
#else
58
char *malloc ();
59
char *realloc ();
60
#endif
61
#ifndef NULL
62
# define NULL 0
63
#endif
64
65
#ifndef stpcpy
66
char *stpcpy ();
67
#endif
68
69
#include "savedir.h"
70
71
/* Return a freshly allocated string containing the filenames
72
   in directory DIR, separated by '\0' characters;
73
   the end is marked by two '\0' characters in a row.
74
   NAME_SIZE is the number of bytes to initially allocate
75
   for the string; it will be enlarged as needed.
76
   Return NULL if DIR cannot be opened or if out of memory. */
77
78
char *
79
savedir (dir, name_size)
80
     const char *dir;
81
     unsigned int name_size;
82
{
83
  DIR *dirp;
84
  struct dirent *dp;
85
  char *name_space;
86
  char *namep;
87
88
  dirp = opendir (dir);
89
  if (dirp == NULL)
90
    return NULL;
91
92
  name_space = (char *) malloc (name_size);
93
  if (name_space == NULL)
94
    {
95
      closedir (dirp);
96
      return NULL;
97
    }
98
  namep = name_space;
99
100
  while ((dp = readdir (dirp)) != NULL)
101
    {
102
      /* Skip "." and ".." (some NFS filesystems' directories lack them). */
103
      if (dp->d_name[0] != '.'
104
	  || (dp->d_name[1] != '\0'
105
	      && (dp->d_name[1] != '.' || dp->d_name[2] != '\0')))
106
	{
107
	  unsigned size_needed = (namep - name_space) + NAMLEN (dp) + 2;
108
109
	  if (size_needed > name_size)
110
	    {
111
	      char *new_name_space;
112
113
	      while (size_needed > name_size)
114
		name_size += 1024;
115
116
	      new_name_space = realloc (name_space, name_size);
117
	      if (new_name_space == NULL)
118
		{
119
		  closedir (dirp);
120
		  return NULL;
121
		}
122
	      namep += new_name_space - name_space;
123
	      name_space = new_name_space;
124
	    }
125
	  namep = stpcpy (namep, dp->d_name) + 1;
126
	}
127
    }
128
  *namep = '\0';
129
  if (CLOSEDIR (dirp))
130
    {
131
      free (name_space);
132
      return NULL;
133
    }
134
  return name_space;
135
}
(-)grep/savedir.h (+15 lines)
Line 0 Link Here
1
#if !defined SAVEDIR_H_
2
# define SAVEDIR_H_
3
4
# ifndef PARAMS
5
#  if defined PROTOTYPES || (defined __STDC__ && __STDC__)
6
#   define PARAMS(Args) Args
7
#  else
8
#   define PARAMS(Args) ()
9
#  endif
10
# endif
11
12
char *
13
savedir PARAMS ((const char *dir, unsigned int name_size));
14
15
#endif
(-)grep/search.c (-76 / +31 lines)
Lines 1-5 Link Here
1
/* search.c - searching subroutines using dfa, kwset and regex for grep.
1
/* search.c - searching subroutines using dfa, kwset and regex for grep.
2
   Copyright (C) 1992 Free Software Foundation, Inc.
2
   Copyright (C) 1992, 1998 Free Software Foundation, Inc.
3
3
4
   This program is free software; you can redistribute it and/or modify
4
   This program is free software; you can redistribute it and/or modify
5
   it under the terms of the GNU General Public License as published by
5
   it under the terms of the GNU General Public License as published by
Lines 13-92 Link Here
13
13
14
   You should have received a copy of the GNU General Public License
14
   You should have received a copy of the GNU General Public License
15
   along with this program; if not, write to the Free Software
15
   along with this program; if not, write to the Free Software
16
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
17
   02111-1307, USA.  */
17
18
18
   Written August 1992 by Mike Haertel. */
19
/* Written August 1992 by Mike Haertel. */
19
20
20
#include <ctype.h>
21
#ifdef HAVE_CONFIG_H
21
22
# include <config.h>
22
#ifdef STDC_HEADERS
23
#include <limits.h>
24
#include <stdlib.h>
25
#else
26
#define UCHAR_MAX 255
27
#include <sys/types.h>
28
extern char *malloc();
29
#endif
30
31
#ifdef HAVE_MEMCHR
32
#include <string.h>
33
#ifdef NEED_MEMORY_H
34
#include <memory.h>
35
#endif
36
#else
37
#ifdef __STDC__
38
extern void *memchr();
39
#else
40
extern char *memchr();
41
#endif
42
#endif
23
#endif
43
24
#include <sys/types.h>
44
#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
25
#include "system.h"
45
#undef bcopy
46
#define bcopy(s, d, n) memcpy((d), (s), (n))
47
#endif
48
49
#if defined(isascii) && !defined(__FreeBSD__)
50
#define ISALNUM(C) (isascii(C) && isalnum(C))
51
#define ISUPPER(C) (isascii(C) && isupper(C))
52
#else
53
#define ISALNUM(C) isalnum((unsigned char)C)
54
#define ISUPPER(C) isupper((unsigned char)C)
55
#endif
56
57
#define TOLOWER(C) (ISUPPER(C) ? tolower((unsigned char)C) : (C))
58
59
#include "grep.h"
26
#include "grep.h"
27
#include "regex.h"
60
#include "dfa.h"
28
#include "dfa.h"
61
#include "kwset.h"
29
#include "kwset.h"
62
#include "gnuregex.h"
63
30
64
#define NCHAR (UCHAR_MAX + 1)
31
#define NCHAR (UCHAR_MAX + 1)
65
32
66
#if __STDC__
33
static void Gcompile PARAMS((char *, size_t));
67
static void Gcompile(char *, size_t);
34
static void Ecompile PARAMS((char *, size_t));
68
static void Ecompile(char *, size_t);
35
static char *EGexecute PARAMS((char *, size_t, char **));
69
static char *EGexecute(char *, size_t, char **);
36
static void Fcompile PARAMS((char *, size_t));
70
static void Fcompile(char *, size_t);
37
static char *Fexecute PARAMS((char *, size_t, char **));
71
static char *Fexecute(char *, size_t, char **);
38
static void kwsinit PARAMS((void));
72
#else
73
static void Gcompile();
74
static void Ecompile();
75
static char *EGexecute();
76
static void Fcompile();
77
static char *Fexecute();
78
#endif
79
39
80
/* Here is the matchers vector for the main program. */
40
/* Here is the matchers vector for the main program. */
81
struct matcher matchers[] = {
41
struct matcher matchers[] = {
82
  { "default", Gcompile, EGexecute },
42
  { "default", Gcompile, EGexecute },
83
  { "grep", Gcompile, EGexecute },
43
  { "grep", Gcompile, EGexecute },
84
  { "ggrep", Gcompile, EGexecute },
85
  { "egrep", Ecompile, EGexecute },
44
  { "egrep", Ecompile, EGexecute },
86
  { "posix-egrep", Ecompile, EGexecute },
45
  { "posix-egrep", Ecompile, EGexecute },
87
  { "gegrep", Ecompile, EGexecute },
46
  { "awk", Ecompile, EGexecute },
88
  { "fgrep", Fcompile, Fexecute },
47
  { "fgrep", Fcompile, Fexecute },
89
  { "gfgrep", Fcompile, Fexecute },
90
  { 0, 0, 0 },
48
  { 0, 0, 0 },
91
};
49
};
92
50
Lines 111-117 Link Here
111
69
112
void
70
void
113
dfaerror(mesg)
71
dfaerror(mesg)
114
     char *mesg;
72
  const char *mesg;
115
{
73
{
116
  fatal(mesg, 0);
74
  fatal(mesg, 0);
117
}
75
}
Lines 173-182 Link Here
173
     char *pattern;
131
     char *pattern;
174
     size_t size;
132
     size_t size;
175
{
133
{
176
#ifdef __STDC__
134
  const char *err;
177
  const
178
#endif
179
  char *err;
180
135
181
  re_set_syntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
136
  re_set_syntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
182
  dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase);
137
  dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase);
Lines 184-191 Link Here
184
  if ((err = re_compile_pattern(pattern, size, &regex)) != 0)
139
  if ((err = re_compile_pattern(pattern, size, &regex)) != 0)
185
    fatal(err, 0);
140
    fatal(err, 0);
186
141
187
  dfainit(&dfa);
188
189
  /* In the match_words and match_lines cases, we use a different pattern
142
  /* In the match_words and match_lines cases, we use a different pattern
190
     for the DFA matcher that will quickly throw out cases that won't work.
143
     for the DFA matcher that will quickly throw out cases that won't work.
191
     Then if DFA succeeds we do some hairy stuff using the regex matcher
144
     Then if DFA succeeds we do some hairy stuff using the regex matcher
Lines 209-215 Link Here
209
	strcpy(n, "\\(^\\|[^0-9A-Za-z_]\\)\\(");
162
	strcpy(n, "\\(^\\|[^0-9A-Za-z_]\\)\\(");
210
163
211
      i = strlen(n);
164
      i = strlen(n);
212
      bcopy(pattern, n + i, size);
165
      memcpy(n + i, pattern, size);
213
      i += size;
166
      i += size;
214
167
215
      if (match_words)
168
      if (match_words)
Lines 231-246 Link Here
231
     char *pattern;
184
     char *pattern;
232
     size_t size;
185
     size_t size;
233
{
186
{
234
#ifdef __STDC__
187
  const char *err;
235
  const
236
#endif
237
  char *err;
238
188
239
  if (strcmp(matcher, "posix-egrep") == 0)
189
  if (strcmp(matcher, "posix-egrep") == 0)
240
    {
190
    {
241
      re_set_syntax(RE_SYNTAX_POSIX_EGREP);
191
      re_set_syntax(RE_SYNTAX_POSIX_EGREP);
242
      dfasyntax(RE_SYNTAX_POSIX_EGREP, match_icase);
192
      dfasyntax(RE_SYNTAX_POSIX_EGREP, match_icase);
243
    }
193
    }
194
  else if (strcmp(matcher, "awk") == 0)
195
    {
196
      re_set_syntax(RE_SYNTAX_AWK);
197
      dfasyntax(RE_SYNTAX_AWK, match_icase);
198
    }
244
  else
199
  else
245
    {
200
    {
246
      re_set_syntax(RE_SYNTAX_EGREP);
201
      re_set_syntax(RE_SYNTAX_EGREP);
Lines 250-257 Link Here
250
  if ((err = re_compile_pattern(pattern, size, &regex)) != 0)
205
  if ((err = re_compile_pattern(pattern, size, &regex)) != 0)
251
    fatal(err, 0);
206
    fatal(err, 0);
252
207
253
  dfainit(&dfa);
254
255
  /* In the match_words and match_lines cases, we use a different pattern
208
  /* In the match_words and match_lines cases, we use a different pattern
256
     for the DFA matcher that will quickly throw out cases that won't work.
209
     for the DFA matcher that will quickly throw out cases that won't work.
257
     Then if DFA succeeds we do some hairy stuff using the regex matcher
210
     Then if DFA succeeds we do some hairy stuff using the regex matcher
Lines 275-281 Link Here
275
	strcpy(n, "(^|[^0-9A-Za-z_])(");
228
	strcpy(n, "(^|[^0-9A-Za-z_])(");
276
229
277
      i = strlen(n);
230
      i = strlen(n);
278
      bcopy(pattern, n + i, size);
231
      memcpy(n + i, pattern, size);
279
      i += size;
232
      i += size;
280
233
281
      if (match_words)
234
      if (match_words)
Lines 358-364 Link Here
358
      if ((start = re_search(&regex, beg, end - beg, 0, end - beg, &regs)) >= 0)
311
      if ((start = re_search(&regex, beg, end - beg, 0, end - beg, &regs)) >= 0)
359
	{
312
	{
360
	  len = regs.end[0] - start;
313
	  len = regs.end[0] - start;
361
	  if (!match_lines && !match_words || match_lines && len == end - beg)
314
	  if ((!match_lines && !match_words)
315
	      || (match_lines && len == end - beg))
362
	    goto success;
316
	    goto success;
363
	  /* If -w, check if the match aligns with word boundaries.
317
	  /* If -w, check if the match aligns with word boundaries.
364
	     We do this iteratively because:
318
	     We do this iteratively because:
Lines 369-376 Link Here
369
	  if (match_words)
323
	  if (match_words)
370
	    while (start >= 0)
324
	    while (start >= 0)
371
	      {
325
	      {
372
		if ((start == 0 || !WCHAR(beg[start - 1]))
326
		if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
373
		    && (len == end - beg || !WCHAR(beg[start + len])))
327
		    && (len == end - beg
328
			|| !WCHAR ((unsigned char) beg[start + len])))
374
		  goto success;
329
		  goto success;
375
		if (len > 0)
330
		if (len > 0)
376
		  {
331
		  {
(-)grep/stpcpy.c (+52 lines)
Line 0 Link Here
1
/* stpcpy.c -- copy a string and return pointer to end of new string
2
   Copyright (C) 1992, 1995, 1997, 1998 Free Software Foundation, Inc.
3
4
   NOTE: The canonical source of this file is maintained with the GNU C Library.
5
   Bugs can be reported to bug-glibc@prep.ai.mit.edu.
6
7
   This program is free software; you can redistribute it and/or modify it
8
   under the terms of the GNU General Public License as published by the
9
   Free Software Foundation; either version 2, or (at your option) any
10
   later version.
11
12
   This program is distributed in the hope that it will be useful,
13
   but WITHOUT ANY WARRANTY; without even the implied warranty of
14
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
   GNU General Public License for more details.
16
17
   You should have received a copy of the GNU General Public License
18
   along with this program; if not, write to the Free Software
19
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
20
   USA.  */
21
22
#ifdef HAVE_CONFIG_H
23
# include <config.h>
24
#endif
25
26
#include <string.h>
27
28
#undef __stpcpy
29
#undef stpcpy
30
31
#ifndef weak_alias
32
# define __stpcpy stpcpy
33
#endif
34
35
/* Copy SRC to DEST, returning the address of the terminating '\0' in DEST.  */
36
char *
37
__stpcpy (dest, src)
38
     char *dest;
39
     const char *src;
40
{
41
  register char *d = dest;
42
  register const char *s = src;
43
44
  do
45
    *d++ = *s;
46
  while (*s++ != '\0');
47
48
  return d - 1;
49
}
50
#ifdef weak_alias
51
weak_alias (__stpcpy, stpcpy)
52
#endif
(-)grep/system.h (+188 lines)
Line 0 Link Here
1
/* Portability cruft.  Include after config.h and sys/types.h.
2
   Copyright (C) 1996, 1998 Free Software Foundation, Inc.
3
4
   This program is free software; you can redistribute it and/or modify
5
   it under the terms of the GNU General Public License as published by
6
   the Free Software Foundation; either version 2, or (at your option)
7
   any later version.
8
9
   This program is distributed in the hope that it will be useful,
10
   but WITHOUT ANY WARRANTY; without even the implied warranty of
11
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
   GNU General Public License for more details.
13
14
   You should have received a copy of the GNU General Public License
15
   along with this program; if not, write to the Free Software
16
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
17
   02111-1307, USA.  */
18
19
#undef PARAMS
20
#if defined (__STDC__) && __STDC__
21
# ifndef _PTR_T
22
# define _PTR_T
23
  typedef void * ptr_t;
24
# endif
25
# define PARAMS(x) x
26
#else
27
# ifndef _PTR_T
28
# define _PTR_T
29
  typedef char * ptr_t;
30
# endif
31
# define PARAMS(x) ()
32
#endif
33
34
#ifdef HAVE_UNISTD_H
35
# include <fcntl.h>
36
# include <unistd.h>
37
#else
38
# define O_RDONLY 0
39
int open(), read(), close();
40
#endif
41
42
#include <errno.h>
43
#ifndef errno
44
extern int errno;
45
#endif
46
47
#ifndef HAVE_STRERROR
48
extern int sys_nerr;
49
extern char *sys_errlist[];
50
# define strerror(E) (0 <= (E) && (E) < sys_nerr ? _(sys_errlist[E]) : _("Unknown system error"))
51
#endif
52
53
/* Some operating systems treat text and binary files differently.  */
54
#if O_BINARY
55
# include <io.h>
56
# ifdef HAVE_SETMODE
57
#  define SET_BINARY(fd)  setmode (fd, O_BINARY)
58
# else
59
#  define SET_BINARY(fd)  _setmode (fd, O_BINARY)
60
# endif
61
#else
62
# ifndef O_BINARY
63
#  define O_BINARY 0
64
#  define SET_BINARY(fd)   (void)0
65
# endif
66
#endif
67
68
#ifdef HAVE_DOS_FILE_NAMES
69
# define IS_SLASH(c) ((c) == '/' || (c) == '\\')
70
# define FILESYSTEM_PREFIX_LEN(f) ((f)[0] && (f)[1] == ':' ? 2 : 0)
71
#endif
72
73
#ifndef IS_SLASH
74
# define IS_SLASH(c) ((c) == '/')
75
#endif
76
77
#ifndef FILESYSTEM_PREFIX_LEN
78
# define FILESYSTEM_PREFIX_LEN(f) 0
79
#endif
80
81
/* This assumes _WIN32, like DJGPP, has D_OK.  Does it?  In what header?  */
82
#ifdef D_OK
83
# ifdef EISDIR
84
#  define is_EISDIR(e, f) \
85
     ((e) == EISDIR \
86
      || ((e) == EACCES && access (f, D_OK) == 0 && ((e) = EISDIR, 1)))
87
# else
88
#  define is_EISDIR(e, f) ((e) == EACCES && access (f, D_OK) == 0)
89
# endif
90
#endif
91
92
#ifndef is_EISDIR
93
# ifdef EISDIR
94
#  define is_EISDIR(e, f) ((e) == EISDIR)
95
# else
96
#  define is_EISDIR(e, f) 0
97
# endif
98
#endif
99
100
#if STAT_MACROS_BROKEN
101
# undef S_ISDIR
102
#endif
103
#if !defined(S_ISDIR) && defined(S_IFDIR)
104
# define S_ISDIR(Mode) (((Mode) & S_IFMT) == S_IFDIR)
105
#endif
106
107
#ifdef STDC_HEADERS
108
# include <stdlib.h>
109
#else
110
ptr_t malloc(), realloc(), calloc();
111
void free();
112
#endif
113
114
#if __STDC__
115
# include <stddef.h>
116
#endif
117
#ifdef STDC_HEADERS
118
# include <limits.h>
119
#endif
120
#ifndef CHAR_BIT
121
# define CHAR_BIT 8
122
#endif
123
#ifndef INT_MAX
124
# define INT_MAX 2147483647
125
#endif
126
#ifndef UCHAR_MAX
127
# define UCHAR_MAX 255
128
#endif
129
130
#if !defined(STDC_HEADERS) && defined(HAVE_STRING_H) && defined(HAVE_MEMORY_H)
131
# include <memory.h>
132
#endif
133
#if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
134
# include <string.h>
135
#else
136
# include <strings.h>
137
# undef strchr
138
# define strchr index
139
# undef strrchr
140
# define strrchr rindex
141
# undef memcpy
142
# define memcpy(d, s, n) bcopy((s), (d), (n))
143
#endif
144
#ifndef HAVE_MEMCHR
145
ptr_t memchr();
146
#endif
147
148
#include <ctype.h>
149
150
#ifndef isgraph
151
# define isgraph(C) (isprint(C) && !isspace(C))
152
#endif
153
154
#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
155
# define IN_CTYPE_DOMAIN(c) 1
156
#else
157
# define IN_CTYPE_DOMAIN(c) isascii(c)
158
#endif
159
160
#define ISALPHA(C)	(IN_CTYPE_DOMAIN (C) && isalpha (C))
161
#define ISUPPER(C)	(IN_CTYPE_DOMAIN (C) && isupper (C))
162
#define ISLOWER(C)	(IN_CTYPE_DOMAIN (C) && islower (C))
163
#define ISDIGIT(C)	(IN_CTYPE_DOMAIN (C) && isdigit (C))
164
#define ISXDIGIT(C)	(IN_CTYPE_DOMAIN (C) && isxdigit (C))
165
#define ISSPACE(C)	(IN_CTYPE_DOMAIN (C) && isspace (C))
166
#define ISPUNCT(C)	(IN_CTYPE_DOMAIN (C) && ispunct (C))
167
#define ISALNUM(C)	(IN_CTYPE_DOMAIN (C) && isalnum (C))
168
#define ISPRINT(C)	(IN_CTYPE_DOMAIN (C) && isprint (C))
169
#define ISGRAPH(C)	(IN_CTYPE_DOMAIN (C) && isgraph (C))
170
#define ISCNTRL(C)	(IN_CTYPE_DOMAIN (C) && iscntrl (C))
171
172
#define TOLOWER(C) (ISUPPER(C) ? tolower(C) : (C))
173
174
#if ENABLE_NLS
175
# include <libintl.h>
176
# define _(String) gettext (String)
177
#else
178
# define _(String) String
179
#endif
180
#define N_(String) String
181
182
#if HAVE_SETLOCALE
183
# include <locale.h>
184
#endif
185
186
#ifndef initialize_main
187
#define initialize_main(argcp, argvp)
188
#endif
(-)grep/vms_fab.c (+88 lines)
Line 0 Link Here
1
     /*
2
        <vms_fab>
3
4
        This macro sets up the file access block and name block for VMS.
5
        It also does the initial parsing of the input string (resolving 
6
        wildcards,
7
        if any) and finds all files matching the input pattern.
8
        The address of the first matching pattern is returned.
9
10
        Written by Phillip C. Brisco 8/98.
11
      */
12
#include "vms_fab.h"
13
14
void
15
vms_fab (argp, argvp)
16
	int * argp;
17
	char **argvp[];
18
{
19
  extern int optind;
20
  int optout;
21
22
  fab = cc$rms_fab;
23
  nam = cc$rms_nam;
24
25
  optout = 0;
26
  strcpy (fna_buffer, *argvp[optind]);
27
  length_of_fna_buffer = NAM$C_MAXRSS;
28
29
  fab.fab$b_bid = FAB$C_BID;
30
  fab.fab$b_bln = FAB$C_BLN;
31
  fab.fab$l_fop = FAB$M_NAM;
32
  fab.fab$l_nam = &nam;
33
  fab.fab$l_fna = &fna_buffer;
34
  fab.fab$b_fns = length_of_fna_buffer;
35
36
  nam.nam$b_bid = NAM$C_BID;
37
  nam.nam$b_bln = NAM$C_BLN;
38
  nam.nam$l_esa = &expanded_name;
39
  nam.nam$b_ess = NAM$C_MAXRSS;
40
  nam.nam$l_rsa = &result_name;
41
  nam.nam$b_rss = NAM$C_MAXRSS;
42
43
  fab_stat = sys$parse (&fab);
44
  fab_stat = sys$search (&fab);
45
46
  if (fab_stat != 65537)
47
    {
48
      fprintf (stderr, "No Matches found.\n");
49
      exit (0);
50
    }
51
52
  /*
53
     While we find matching patterns, continue searching for more.
54
   */
55
  while (fab_stat == 65537)
56
    {
57
      /*
58
         Allocate memory for the filename
59
       */
60
      arr_ptr[optout] = alloca (max_file_path_size + 1);
61
62
      strcpy (arr_ptr[optout], result_name);
63
64
      /*
65
         If we don't tack on a null character at the end of the 
66
         filename,
67
         we can get partial data which is still there from the last
68
         sys$search command.
69
       */
70
      arr_ptr[optout][nam.nam$b_dev +
71
		      nam.nam$b_dir +
72
		      nam.nam$b_name +
73
		      nam.nam$b_type +
74
		      nam.nam$b_ver] = '\0';
75
76
      fab_stat = sys$search (&fab);
77
      optout++;
78
    }
79
80
  optout--;
81
82
  /* Return a pointer to the beginning of memory that has the expanded
83
     filenames.
84
   */
85
  *argcp = optout;
86
  *argvp = arr_ptr;
87
88
}
(-)grep/vms_fab.h (+20 lines)
Line 0 Link Here
1
/*
2
   This file includes the setup for the file access block for VMS.
3
   Written by Phillip C. Brisco 8/98.
4
 */
5
6
#include <rms.h>
7
#include <ssdef.h>
8
#include <stddef.h>
9
10
struct FAB fab;
11
struct NAM nam;
12
13
int length_of_fna_buffer;
14
int fab_stat;
15
char expanded_name[NAM$C_MAXRSS];
16
char fna_buffer[NAM$C_MAXRSS];
17
char result_name[NAM$C_MAXRSS];
18
char final_name[NAM$C_MAXRSS];
19
int max_file_path_size = NAM$C_MAXRSS;
20
char *arr_ptr[32767]:

Return to bug 13935