source: scripts/patches/grep-2.5.1a-redhat_fixes-2.patch@ ed3a13d4

clfs-1.2 clfs-2.1 clfs-3.0.0-systemd clfs-3.0.0-sysvinit systemd sysvinit
Last change on this file since ed3a13d4 was c0cf39e, checked in by Jim Gifford <clfs@…>, 19 years ago

r2506@server (orig r1245): ryan | 2006-03-10 02:07:10 -0800

r1295@rei: lfs | 2006-03-09 18:29:16 +1100
Add grep-2.5.1a-redhat_fixes-2.patch
------------------------------------
Submitted by: Alexander E. Patrakov
Date: 2005-08-13
Initial Package Version: 2.5.1a
Upstream Status: Partially accepted, partially rejected, but required for LSB >= 2.0 certification
Origin: RedHat
Description: Various fixes from RedHat. Individual patches:


grep-2.5.1-fgrep.patch
grep-2.5.1-bracket.patch
grep-2.5-i18n.patch
grep-2.5.1-oi.patch
grep-2.5.1-manpage.patch
grep-2.5.1-color.patch
grep-2.5.1-icolor.patch
grep-2.5.1-egf-speedup.patch
grep-2.5.1-dfa-optional.patch
grep-2.5.1-tests.patch
grep-2.5.1-w.patch


Testcases:


-fgrep: ???, but required for other patches
-bracket: echo "[" | LANG=en_US.UTF-8 grep ":space:"
-i18n: many fixes for multibyte locale support, required for LSB.
-oi: echo xxYYzz | LANG=C grep -i -o yy
-manpage: typo
-color: restore the background color correctly
-icolor: ??? echo 'spam foo SPAM FOO' | grep -i --color spam

(but that's also fixed by -oi. Is this patch just a cleanup?)

-egf-speedup: without this, grep is as slow as a snail in UTF-8 locales.
-dfa-optional: disables dfa in multibyte locales by default.
-w: (echo 'foo';echo 'fo') > /tmp/testfile && grep -F -w fo /tmp/testfile



  • Property mode set to 100644
File size: 54.8 KB
RevLine 
[c0cf39e]1Submitted by: Alexander E. Patrakov
2Date: 2005-08-13
3Initial Package Version: 2.5.1a
4Upstream Status: Partially accepted, partially rejected, but required for LSB >= 2.0 certification
5Origin: RedHat
6Description: Various fixes from RedHat. Individual patches:
7
8 grep-2.5.1-fgrep.patch
9 grep-2.5.1-bracket.patch
10 grep-2.5-i18n.patch
11 grep-2.5.1-oi.patch
12 grep-2.5.1-manpage.patch
13 grep-2.5.1-color.patch
14 grep-2.5.1-icolor.patch
15 grep-2.5.1-egf-speedup.patch
16 grep-2.5.1-dfa-optional.patch
17 grep-2.5.1-tests.patch
18 grep-2.5.1-w.patch
19
20Testcases:
21
22 -fgrep: ???, but required for other patches
23 -bracket: echo "[" | LANG=en_US.UTF-8 grep "[[:space:]]"
24 -i18n: many fixes for multibyte locale support, required for LSB.
25 -oi: echo xxYYzz | LANG=C grep -i -o yy
26 -manpage: typo
27 -color: restore the background color correctly
28 -icolor: ??? echo 'spam foo SPAM FOO' | grep -i --color spam
29 (but that's also fixed by -oi. Is this patch just a cleanup?)
30 -egf-speedup: without this, grep is as slow as a snail in UTF-8 locales.
31 -dfa-optional: disables dfa in multibyte locales by default.
32 -w: (echo 'foo';echo 'fo') > /tmp/testfile && grep -F -w fo /tmp/testfile
33
34diff -urN grep-2.5.1a.orig/doc/grep.1 grep-2.5.1a/doc/grep.1
35--- grep-2.5.1a.orig/doc/grep.1 2004-11-12 16:26:37.000000000 +0500
36+++ grep-2.5.1a/doc/grep.1 2005-10-23 09:49:43.000000000 +0600
37@@ -191,6 +191,7 @@
38 .I PATTERN
39 as a list of fixed strings, separated by newlines,
40 any of which is to be matched.
41+.TP
42 .BR \-P ", " \-\^\-perl-regexp
43 Interpret
44 .I PATTERN
45@@ -302,7 +303,7 @@
46 This is especially useful for tools like zgrep, e.g.
47 .B "gzip -cd foo.gz |grep --label=foo something"
48 .TP
49-.BR \-\^\-line-buffering
50+.BR \-\^\-line-buffered
51 Use line buffering, it can be a performance penality.
52 .TP
53 .BR \-q ", " \-\^\-quiet ", " \-\^\-silent
54diff -urN grep-2.5.1a.orig/lib/posix/regex.h grep-2.5.1a/lib/posix/regex.h
55--- grep-2.5.1a.orig/lib/posix/regex.h 2001-04-02 23:56:50.000000000 +0600
56+++ grep-2.5.1a/lib/posix/regex.h 2005-10-23 09:49:31.000000000 +0600
57@@ -109,6 +109,10 @@
58 If not set, \{, \}, {, and } are literals. */
59 #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
60
61+/* If this bit is set, then ignore case when matching.
62+ If not set, then case is significant. */
63+#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
64+
65 /* If this bit is set, +, ? and | aren't recognized as operators.
66 If not set, they are. */
67 #define RE_LIMITED_OPS (RE_INTERVALS << 1)
68diff -urN grep-2.5.1a.orig/src/dfa.c grep-2.5.1a/src/dfa.c
69--- grep-2.5.1a.orig/src/dfa.c 2001-09-26 22:57:55.000000000 +0600
70+++ grep-2.5.1a/src/dfa.c 2005-10-23 09:49:17.000000000 +0600
71@@ -414,7 +414,7 @@
72
73 /* This function fetch a wide character, and update cur_mb_len,
74 used only if the current locale is a multibyte environment. */
75-static wchar_t
76+static wint_t
77 fetch_wc (char const *eoferr)
78 {
79 wchar_t wc;
80@@ -423,7 +423,7 @@
81 if (eoferr != 0)
82 dfaerror (eoferr);
83 else
84- return -1;
85+ return WEOF;
86 }
87
88 cur_mb_len = mbrtowc(&wc, lexptr, lexleft, &mbs);
89@@ -459,7 +459,7 @@
90 static void
91 parse_bracket_exp_mb ()
92 {
93- wchar_t wc, wc1, wc2;
94+ wint_t wc, wc1, wc2;
95
96 /* Work area to build a mb_char_classes. */
97 struct mb_char_classes *work_mbc;
98@@ -496,7 +496,7 @@
99 work_mbc->invert = 0;
100 do
101 {
102- wc1 = -1; /* mark wc1 is not initialized". */
103+ wc1 = WEOF; /* mark wc1 is not initialized". */
104
105 /* Note that if we're looking at some other [:...:] construct,
106 we just treat it as a bunch of ordinary characters. We can do
107@@ -586,7 +586,7 @@
108 work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem;
109 }
110 }
111- wc = -1;
112+ wc1 = wc = WEOF;
113 }
114 else
115 /* We treat '[' as a normal character here. */
116@@ -600,7 +600,7 @@
117 wc = fetch_wc(("Unbalanced ["));
118 }
119
120- if (wc1 == -1)
121+ if (wc1 == WEOF)
122 wc1 = fetch_wc(_("Unbalanced ["));
123
124 if (wc1 == L'-')
125@@ -630,17 +630,17 @@
126 }
127 REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t,
128 range_sts_al, work_mbc->nranges + 1);
129- work_mbc->range_sts[work_mbc->nranges] = wc;
130+ work_mbc->range_sts[work_mbc->nranges] = (wchar_t)wc;
131 REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t,
132 range_ends_al, work_mbc->nranges + 1);
133- work_mbc->range_ends[work_mbc->nranges++] = wc2;
134+ work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2;
135 }
136- else if (wc != -1)
137+ else if (wc != WEOF)
138 /* build normal characters. */
139 {
140 REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al,
141 work_mbc->nchars + 1);
142- work_mbc->chars[work_mbc->nchars++] = wc;
143+ work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc;
144 }
145 }
146 while ((wc = wc1) != L']');
147@@ -2552,6 +2552,8 @@
148 }
149
150 /* match with a character? */
151+ if (case_fold)
152+ wc = towlower (wc);
153 for (i = 0; i<work_mbc->nchars; i++)
154 {
155 if (wc == work_mbc->chars[i])
156diff -urN grep-2.5.1a.orig/src/grep.c grep-2.5.1a/src/grep.c
157--- grep-2.5.1a.orig/src/grep.c 2004-11-12 16:25:35.000000000 +0500
158+++ grep-2.5.1a/src/grep.c 2005-10-23 09:50:06.000000000 +0600
159@@ -30,6 +30,12 @@
160 # include <sys/time.h>
161 # include <sys/resource.h>
162 #endif
163+#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
164+/* We can handle multibyte string. */
165+# define MBS_SUPPORT
166+# include <wchar.h>
167+# include <wctype.h>
168+#endif
169 #include <stdio.h>
170 #include "system.h"
171 #include "getopt.h"
172@@ -558,33 +564,6 @@
173 {
174 size_t match_size;
175 size_t match_offset;
176- if(match_icase)
177- {
178- /* Yuck, this is tricky */
179- char *buf = (char*) xmalloc (lim - beg);
180- char *ibeg = buf;
181- char *ilim = ibeg + (lim - beg);
182- int i;
183- for (i = 0; i < lim - beg; i++)
184- ibeg[i] = tolower (beg[i]);
185- while ((match_offset = (*execute) (ibeg, ilim-ibeg, &match_size, 1))
186- != (size_t) -1)
187- {
188- char const *b = beg + match_offset;
189- if (b == lim)
190- break;
191- fwrite (beg, sizeof (char), match_offset, stdout);
192- printf ("\33[%sm", grep_color);
193- fwrite (b, sizeof (char), match_size, stdout);
194- fputs ("\33[00m", stdout);
195- beg = b + match_size;
196- ibeg = ibeg + match_offset + match_size;
197- }
198- fwrite (beg, 1, lim - beg, stdout);
199- free (buf);
200- lastout = lim;
201- return;
202- }
203 while (lim-beg && (match_offset = (*execute) (beg, lim - beg, &match_size, 1))
204 != (size_t) -1)
205 {
206@@ -601,6 +580,7 @@
207 fputs ("\33[00m", stdout);
208 beg = b + match_size;
209 }
210+ fputs ("\33[K", stdout);
211 }
212 fwrite (beg, 1, lim - beg, stdout);
213 if (ferror (stdout))
214@@ -1697,6 +1677,37 @@
215 if (!install_matcher (matcher) && !install_matcher ("default"))
216 abort ();
217
218+#ifdef MBS_SUPPORT
219+ if (MB_CUR_MAX != 1 && match_icase)
220+ {
221+ wchar_t wc;
222+ mbstate_t cur_state, prev_state;
223+ int i, len = strlen(keys);
224+
225+ memset(&cur_state, 0, sizeof(mbstate_t));
226+ for (i = 0; i <= len ;)
227+ {
228+ size_t mbclen;
229+ mbclen = mbrtowc(&wc, keys + i, len - i, &cur_state);
230+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
231+ {
232+ /* An invalid sequence, or a truncated multibyte character.
233+ We treat it as a singlebyte character. */
234+ mbclen = 1;
235+ }
236+ else
237+ {
238+ if (iswupper((wint_t)wc))
239+ {
240+ wc = towlower((wint_t)wc);
241+ wcrtomb(keys + i, wc, &cur_state);
242+ }
243+ }
244+ i += mbclen;
245+ }
246+ }
247+#endif /* MBS_SUPPORT */
248+
249 (*compile)(keys, keycc);
250
251 if ((argc - optind > 1 && !no_filenames) || with_filenames)
252diff -urN grep-2.5.1a.orig/src/search.c grep-2.5.1a/src/search.c
253--- grep-2.5.1a.orig/src/search.c 2001-04-19 09:42:14.000000000 +0600
254+++ grep-2.5.1a/src/search.c 2005-10-23 09:51:25.000000000 +0600
255@@ -18,9 +18,13 @@
256
257 /* Written August 1992 by Mike Haertel. */
258
259+#ifndef _GNU_SOURCE
260+# define _GNU_SOURCE 1
261+#endif
262 #ifdef HAVE_CONFIG_H
263 # include <config.h>
264 #endif
265+#include <assert.h>
266 #include <sys/types.h>
267 #if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
268 /* We can handle multibyte string. */
269@@ -31,7 +35,7 @@
270
271 #include "system.h"
272 #include "grep.h"
273-#include "regex.h"
274+#include <regex.h>
275 #include "dfa.h"
276 #include "kwset.h"
277 #include "error.h"
278@@ -39,6 +43,9 @@
279 #ifdef HAVE_LIBPCRE
280 # include <pcre.h>
281 #endif
282+#ifdef HAVE_LANGINFO_CODESET
283+# include <langinfo.h>
284+#endif
285
286 #define NCHAR (UCHAR_MAX + 1)
287
288@@ -70,9 +77,10 @@
289 call the regexp matcher at all. */
290 static int kwset_exact_matches;
291
292-#if defined(MBS_SUPPORT)
293-static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
294-#endif
295+/* UTF-8 encoding allows some optimizations that we can't otherwise
296+ assume in a multibyte encoding. */
297+static int using_utf8;
298+
299 static void kwsinit PARAMS ((void));
300 static void kwsmusts PARAMS ((void));
301 static void Gcompile PARAMS ((char const *, size_t));
302@@ -84,6 +92,15 @@
303 static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));
304
305 void
306+check_utf8 (void)
307+{
308+#ifdef HAVE_LANGINFO_CODESET
309+ if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0)
310+ using_utf8 = 1;
311+#endif
312+}
313+
314+void
315 dfaerror (char const *mesg)
316 {
317 error (2, 0, mesg);
318@@ -141,38 +158,6 @@
319 }
320 }
321
322-#ifdef MBS_SUPPORT
323-/* This function allocate the array which correspond to "buf".
324- Then this check multibyte string and mark on the positions which
325- are not singlebyte character nor the first byte of a multibyte
326- character. Caller must free the array. */
327-static char*
328-check_multibyte_string(char const *buf, size_t size)
329-{
330- char *mb_properties = malloc(size);
331- mbstate_t cur_state;
332- int i;
333- memset(&cur_state, 0, sizeof(mbstate_t));
334- memset(mb_properties, 0, sizeof(char)*size);
335- for (i = 0; i < size ;)
336- {
337- size_t mbclen;
338- mbclen = mbrlen(buf + i, size - i, &cur_state);
339-
340- if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
341- {
342- /* An invalid sequence, or a truncated multibyte character.
343- We treat it as a singlebyte character. */
344- mbclen = 1;
345- }
346- mb_properties[i] = mbclen;
347- i += mbclen;
348- }
349-
350- return mb_properties;
351-}
352-#endif
353-
354 static void
355 Gcompile (char const *pattern, size_t size)
356 {
357@@ -181,7 +166,8 @@
358 size_t total = size;
359 char const *motif = pattern;
360
361- re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
362+ check_utf8 ();
363+ re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | (match_icase ? RE_ICASE : 0));
364 dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
365
366 /* For GNU regex compiler we have to pass the patterns separately to detect
367@@ -233,7 +219,7 @@
368 static char const line_end[] = "\\)$";
369 static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
370 static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)";
371- char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
372+ char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
373 size_t i;
374 strcpy (n, match_lines ? line_beg : word_beg);
375 i = strlen (n);
376@@ -257,14 +243,15 @@
377 size_t total = size;
378 char const *motif = pattern;
379
380+ check_utf8 ();
381 if (strcmp (matcher, "awk") == 0)
382 {
383- re_set_syntax (RE_SYNTAX_AWK);
384+ re_set_syntax (RE_SYNTAX_AWK | (match_icase ? RE_ICASE : 0));
385 dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
386 }
387 else
388 {
389- re_set_syntax (RE_SYNTAX_POSIX_EGREP);
390+ re_set_syntax (RE_SYNTAX_POSIX_EGREP | (match_icase ? RE_ICASE : 0));
391 dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
392 }
393
394@@ -316,7 +303,7 @@
395 static char const line_end[] = ")$";
396 static char const word_beg[] = "(^|[^[:alnum:]_])(";
397 static char const word_end[] = ")([^[:alnum:]_]|$)";
398- char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
399+ char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
400 size_t i;
401 strcpy (n, match_lines ? line_beg : word_beg);
402 i = strlen(n);
403@@ -339,15 +326,35 @@
404 char eol = eolbyte;
405 int backref, start, len;
406 struct kwsmatch kwsm;
407- size_t i;
408+ size_t i, ret_val;
409+ static int use_dfa;
410+ static int use_dfa_checked = 0;
411 #ifdef MBS_SUPPORT
412- char *mb_properties = NULL;
413+ const char *last_char = NULL;
414+ int mb_cur_max = MB_CUR_MAX;
415+ mbstate_t mbs;
416+ memset (&mbs, '\0', sizeof (mbstate_t));
417 #endif /* MBS_SUPPORT */
418
419+ if (!use_dfa_checked)
420+ {
421+ char *grep_use_dfa = getenv ("GREP_USE_DFA");
422+ if (!grep_use_dfa)
423+ {
424 #ifdef MBS_SUPPORT
425- if (MB_CUR_MAX > 1 && kwset)
426- mb_properties = check_multibyte_string(buf, size);
427+ /* Turn off DFA when processing multibyte input. */
428+ use_dfa = (MB_CUR_MAX == 1);
429+#else
430+ use_dfa = 1;
431 #endif /* MBS_SUPPORT */
432+ }
433+ else
434+ {
435+ use_dfa = atoi (grep_use_dfa);
436+ }
437+
438+ use_dfa_checked = 1;
439+ }
440
441 buflim = buf + size;
442
443@@ -358,47 +365,124 @@
444 if (kwset)
445 {
446 /* Find a possible match using the KWset matcher. */
447- size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
448+#ifdef MBS_SUPPORT
449+ size_t bytes_left = 0;
450+#endif /* MBS_SUPPORT */
451+ size_t offset;
452+#ifdef MBS_SUPPORT
453+ /* kwsexec doesn't work with match_icase and multibyte input. */
454+ if (match_icase && mb_cur_max > 1)
455+ /* Avoid kwset */
456+ offset = 0;
457+ else
458+#endif /* MBS_SUPPORT */
459+ offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
460 if (offset == (size_t) -1)
461- {
462+ goto failure;
463 #ifdef MBS_SUPPORT
464- if (MB_CUR_MAX > 1)
465- free(mb_properties);
466-#endif
467- return (size_t)-1;
468+ if (mb_cur_max > 1 && !using_utf8)
469+ {
470+ bytes_left = offset;
471+ while (bytes_left)
472+ {
473+ size_t mlen = mbrlen (beg, bytes_left, &mbs);
474+
475+ last_char = beg;
476+ if (mlen == (size_t) -1 || mlen == 0)
477+ {
478+ /* Incomplete character: treat as single-byte. */
479+ memset (&mbs, '\0', sizeof (mbstate_t));
480+ beg++;
481+ bytes_left--;
482+ continue;
483+ }
484+
485+ if (mlen == (size_t) -2)
486+ /* Offset points inside multibyte character:
487+ * no good. */
488+ break;
489+
490+ beg += mlen;
491+ bytes_left -= mlen;
492+ }
493 }
494+ else
495+#endif /* MBS_SUPPORT */
496 beg += offset;
497 /* Narrow down to the line containing the candidate, and
498 run it through DFA. */
499 end = memchr(beg, eol, buflim - beg);
500 end++;
501 #ifdef MBS_SUPPORT
502- if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
503+ if (mb_cur_max > 1 && bytes_left)
504 continue;
505-#endif
506+#endif /* MBS_SUPPORT */
507 while (beg > buf && beg[-1] != eol)
508 --beg;
509- if (kwsm.index < kwset_exact_matches)
510- goto success;
511- if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
512+ if (
513+#ifdef MBS_SUPPORT
514+ !(match_icase && mb_cur_max > 1) &&
515+#endif /* MBS_SUPPORT */
516+ (kwsm.index < kwset_exact_matches))
517+ goto success_in_beg_and_end;
518+ if (use_dfa &&
519+ dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
520 continue;
521 }
522 else
523 {
524 /* No good fixed strings; start with DFA. */
525- size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
526+#ifdef MBS_SUPPORT
527+ size_t bytes_left = 0;
528+#endif /* MBS_SUPPORT */
529+ size_t offset = 0;
530+ if (use_dfa)
531+ offset = dfaexec (&dfa, beg, buflim - beg, &backref);
532 if (offset == (size_t) -1)
533 break;
534 /* Narrow down to the line we've found. */
535+#ifdef MBS_SUPPORT
536+ if (mb_cur_max > 1 && !using_utf8)
537+ {
538+ bytes_left = offset;
539+ while (bytes_left)
540+ {
541+ size_t mlen = mbrlen (beg, bytes_left, &mbs);
542+
543+ last_char = beg;
544+ if (mlen == (size_t) -1 || mlen == 0)
545+ {
546+ /* Incomplete character: treat as single-byte. */
547+ memset (&mbs, '\0', sizeof (mbstate_t));
548+ beg++;
549+ bytes_left--;
550+ continue;
551+ }
552+
553+ if (mlen == (size_t) -2)
554+ /* Offset points inside multibyte character:
555+ * no good. */
556+ break;
557+
558+ beg += mlen;
559+ bytes_left -= mlen;
560+ }
561+ }
562+ else
563+#endif /* MBS_SUPPORT */
564 beg += offset;
565 end = memchr (beg, eol, buflim - beg);
566 end++;
567+#ifdef MBS_SUPPORT
568+ if (mb_cur_max > 1 && bytes_left)
569+ continue;
570+#endif /* MBS_SUPPORT */
571 while (beg > buf && beg[-1] != eol)
572 --beg;
573 }
574 /* Successful, no backreferences encountered! */
575- if (!backref)
576- goto success;
577+ if (use_dfa && !backref)
578+ goto success_in_beg_and_end;
579 }
580 else
581 end = beg + size;
582@@ -413,14 +497,11 @@
583 end - beg - 1, &(patterns[i].regs))))
584 {
585 len = patterns[i].regs.end[0] - start;
586- if (exact)
587- {
588- *match_size = len;
589- return start;
590- }
591+ if (exact && !match_words)
592+ goto success_in_start_and_len;
593 if ((!match_lines && !match_words)
594 || (match_lines && len == end - beg - 1))
595- goto success;
596+ goto success_in_beg_and_end;
597 /* If -w, check if the match aligns with word boundaries.
598 We do this iteratively because:
599 (a) the line may contain more than one occurence of the
600@@ -431,10 +512,84 @@
601 if (match_words)
602 while (start >= 0)
603 {
604- if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
605- && (len == end - beg - 1
606- || !WCHAR ((unsigned char) beg[start + len])))
607- goto success;
608+ int lword_match = 0;
609+ if (start == 0)
610+ lword_match = 1;
611+ else
612+ {
613+ assert (start > 0);
614+#ifdef MBS_SUPPORT
615+ if (mb_cur_max > 1)
616+ {
617+ const char *s;
618+ int mr;
619+ wchar_t pwc;
620+
621+ if (using_utf8)
622+ {
623+ s = beg + start - 1;
624+ while (s > buf
625+ && (unsigned char) *s >= 0x80
626+ && (unsigned char) *s <= 0xbf)
627+ --s;
628+ }
629+ else
630+ s = last_char;
631+ mr = mbtowc (&pwc, s, beg + start - s);
632+ if (mr <= 0)
633+ {
634+ memset (&mbs, '\0', sizeof (mbstate_t));
635+ lword_match = 1;
636+ }
637+ else if (!(iswalnum (pwc) || pwc == L'_')
638+ && mr == (int) (beg + start - s))
639+ lword_match = 1;
640+ }
641+ else
642+#endif /* MBS_SUPPORT */
643+ if (!WCHAR ((unsigned char) beg[start - 1]))
644+ lword_match = 1;
645+ }
646+
647+ if (lword_match)
648+ {
649+ int rword_match = 0;
650+ if (start + len == end - beg - 1)
651+ rword_match = 1;
652+ else
653+ {
654+#ifdef MBS_SUPPORT
655+ if (mb_cur_max > 1)
656+ {
657+ wchar_t nwc;
658+ int mr;
659+
660+ mr = mbtowc (&nwc, beg + start + len,
661+ end - beg - start - len - 1);
662+ if (mr <= 0)
663+ {
664+ memset (&mbs, '\0', sizeof (mbstate_t));
665+ rword_match = 1;
666+ }
667+ else if (!iswalnum (nwc) && nwc != L'_')
668+ rword_match = 1;
669+ }
670+ else
671+#endif /* MBS_SUPPORT */
672+ if (!WCHAR ((unsigned char) beg[start + len]))
673+ rword_match = 1;
674+ }
675+
676+ if (rword_match)
677+ {
678+ if (!exact)
679+ /* Returns the whole line. */
680+ goto success_in_beg_and_end;
681+ else
682+ /* Returns just this word match. */
683+ goto success_in_start_and_len;
684+ }
685+ }
686 if (len > 0)
687 {
688 /* Try a shorter length anchored at the same place. */
689@@ -461,26 +616,154 @@
690 }
691 } /* for Regex patterns. */
692 } /* for (beg = end ..) */
693-#ifdef MBS_SUPPORT
694- if (MB_CUR_MAX > 1 && mb_properties)
695- free (mb_properties);
696-#endif /* MBS_SUPPORT */
697+
698+ failure:
699 return (size_t) -1;
700
701- success:
702-#ifdef MBS_SUPPORT
703- if (MB_CUR_MAX > 1 && mb_properties)
704- free (mb_properties);
705-#endif /* MBS_SUPPORT */
706- *match_size = end - beg;
707- return beg - buf;
708+ success_in_beg_and_end:
709+ len = end - beg;
710+ start = beg - buf;
711+ /* FALLTHROUGH */
712+
713+ success_in_start_and_len:
714+ *match_size = len;
715+ return start;
716 }
717
718+#ifdef MBS_SUPPORT
719+static int f_i_multibyte; /* whether we're using the new -Fi MB method */
720+static struct
721+{
722+ wchar_t **patterns;
723+ size_t count, maxlen;
724+ unsigned char *match;
725+} Fimb;
726+#endif
727+
728 static void
729 Fcompile (char const *pattern, size_t size)
730 {
731+ int mb_cur_max = MB_CUR_MAX;
732 char const *beg, *lim, *err;
733
734+ check_utf8 ();
735+#ifdef MBS_SUPPORT
736+ /* Support -F -i for UTF-8 input. */
737+ if (match_icase && mb_cur_max > 1)
738+ {
739+ mbstate_t mbs;
740+ wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t));
741+ const char *patternend = pattern;
742+ size_t wcsize;
743+ kwset_t fimb_kwset = NULL;
744+ char *starts = NULL;
745+ wchar_t *wcbeg, *wclim;
746+ size_t allocated = 0;
747+
748+ memset (&mbs, '\0', sizeof (mbs));
749+# ifdef __GNU_LIBRARY__
750+ wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs);
751+ if (patternend != pattern + size)
752+ wcsize = (size_t) -1;
753+# else
754+ {
755+ char *patterncopy = xmalloc (size + 1);
756+
757+ memcpy (patterncopy, pattern, size);
758+ patterncopy[size] = '\0';
759+ patternend = patterncopy;
760+ wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs);
761+ if (patternend != patterncopy + size)
762+ wcsize = (size_t) -1;
763+ free (patterncopy);
764+ }
765+# endif
766+ if (wcsize + 2 <= 2)
767+ {
768+fimb_fail:
769+ free (wcpattern);
770+ free (starts);
771+ if (fimb_kwset)
772+ kwsfree (fimb_kwset);
773+ free (Fimb.patterns);
774+ Fimb.patterns = NULL;
775+ }
776+ else
777+ {
778+ if (!(fimb_kwset = kwsalloc (NULL)))
779+ error (2, 0, _("memory exhausted"));
780+
781+ starts = xmalloc (mb_cur_max * 3);
782+ wcbeg = wcpattern;
783+ do
784+ {
785+ int i;
786+ size_t wclen;
787+
788+ if (Fimb.count >= allocated)
789+ {
790+ if (allocated == 0)
791+ allocated = 128;
792+ else
793+ allocated *= 2;
794+ Fimb.patterns = xrealloc (Fimb.patterns,
795+ sizeof (wchar_t *) * allocated);
796+ }
797+ Fimb.patterns[Fimb.count++] = wcbeg;
798+ for (wclim = wcbeg;
799+ wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim)
800+ *wclim = towlower (*wclim);
801+ *wclim = L'\0';
802+ wclen = wclim - wcbeg;
803+ if (wclen > Fimb.maxlen)
804+ Fimb.maxlen = wclen;
805+ if (wclen > 3)
806+ wclen = 3;
807+ if (wclen == 0)
808+ {
809+ if ((err = kwsincr (fimb_kwset, "", 0)) != 0)
810+ error (2, 0, err);
811+ }
812+ else
813+ for (i = 0; i < (1 << wclen); i++)
814+ {
815+ char *p = starts;
816+ int j, k;
817+
818+ for (j = 0; j < wclen; ++j)
819+ {
820+ wchar_t wc = wcbeg[j];
821+ if (i & (1 << j))
822+ {
823+ wc = towupper (wc);
824+ if (wc == wcbeg[j])
825+ continue;
826+ }
827+ k = wctomb (p, wc);
828+ if (k <= 0)
829+ goto fimb_fail;
830+ p += k;
831+ }
832+ if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0)
833+ error (2, 0, err);
834+ }
835+ if (wclim < wcpattern + wcsize)
836+ ++wclim;
837+ wcbeg = wclim;
838+ }
839+ while (wcbeg < wcpattern + wcsize);
840+ f_i_multibyte = 1;
841+ kwset = fimb_kwset;
842+ free (starts);
843+ Fimb.match = xmalloc (Fimb.count);
844+ if ((err = kwsprep (kwset)) != 0)
845+ error (2, 0, err);
846+ return;
847+ }
848+ }
849+#endif /* MBS_SUPPORT */
850+
851+
852 kwsinit ();
853 beg = pattern;
854 do
855@@ -499,6 +782,76 @@
856 error (2, 0, err);
857 }
858
859+#ifdef MBS_SUPPORT
860+static int
861+Fimbexec (const char *buf, size_t size, size_t *plen, int exact)
862+{
863+ size_t len, letter, i;
864+ int ret = -1;
865+ mbstate_t mbs;
866+ wchar_t wc;
867+ int patterns_left;
868+
869+ assert (match_icase && f_i_multibyte == 1);
870+ assert (MB_CUR_MAX > 1);
871+
872+ memset (&mbs, '\0', sizeof (mbs));
873+ memset (Fimb.match, '\1', Fimb.count);
874+ letter = len = 0;
875+ patterns_left = 1;
876+ while (patterns_left && len <= size)
877+ {
878+ size_t c;
879+
880+ patterns_left = 0;
881+ if (len < size)
882+ {
883+ c = mbrtowc (&wc, buf + len, size - len, &mbs);
884+ if (c + 2 <= 2)
885+ return ret;
886+
887+ wc = towlower (wc);
888+ }
889+ else
890+ {
891+ c = 1;
892+ wc = L'\0';
893+ }
894+
895+ for (i = 0; i < Fimb.count; i++)
896+ {
897+ if (Fimb.match[i])
898+ {
899+ if (Fimb.patterns[i][letter] == L'\0')
900+ {
901+ /* Found a match. */
902+ *plen = len;
903+ if (!exact && !match_words)
904+ return 0;
905+ else
906+ {
907+ /* For -w or exact look for longest match. */
908+ ret = 0;
909+ Fimb.match[i] = '\0';
910+ continue;
911+ }
912+ }
913+
914+ if (Fimb.patterns[i][letter] == wc)
915+ patterns_left = 1;
916+ else
917+ Fimb.match[i] = '\0';
918+ }
919+ }
920+
921+ len += c;
922+ letter++;
923+ }
924+
925+ return ret;
926+}
927+#endif /* MBS_SUPPORT */
928+
929 static size_t
930 Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
931 {
932@@ -506,88 +859,268 @@
933 register size_t len;
934 char eol = eolbyte;
935 struct kwsmatch kwsmatch;
936+ size_t ret_val;
937 #ifdef MBS_SUPPORT
938- char *mb_properties;
939- if (MB_CUR_MAX > 1)
940- mb_properties = check_multibyte_string (buf, size);
941+ int mb_cur_max = MB_CUR_MAX;
942+ mbstate_t mbs;
943+ memset (&mbs, '\0', sizeof (mbstate_t));
944+ const char *last_char = NULL;
945 #endif /* MBS_SUPPORT */
946
947 for (beg = buf; beg <= buf + size; ++beg)
948 {
949- size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
950+ size_t offset;
951+ offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
952+
953 if (offset == (size_t) -1)
954- {
955+ goto failure;
956 #ifdef MBS_SUPPORT
957- if (MB_CUR_MAX > 1)
958- free(mb_properties);
959-#endif /* MBS_SUPPORT */
960- return offset;
961+ if (mb_cur_max > 1 && !using_utf8)
962+ {
963+ size_t bytes_left = offset;
964+ while (bytes_left)
965+ {
966+ size_t mlen = mbrlen (beg, bytes_left, &mbs);
967+
968+ last_char = beg;
969+ if (mlen == (size_t) -1 || mlen == 0)
970+ {
971+ /* Incomplete character: treat as single-byte. */
972+ memset (&mbs, '\0', sizeof (mbstate_t));
973+ beg++;
974+ bytes_left--;
975+ continue;
976+ }
977+
978+ if (mlen == (size_t) -2)
979+ /* Offset points inside multibyte character: no good. */
980+ break;
981+
982+ beg += mlen;
983+ bytes_left -= mlen;
984+ }
985+
986+ if (bytes_left)
987+ continue;
988 }
989-#ifdef MBS_SUPPORT
990- if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
991- continue; /* It is a part of multibyte character. */
992+ else
993 #endif /* MBS_SUPPORT */
994 beg += offset;
995- len = kwsmatch.size[0];
996- if (exact)
997- {
998- *match_size = len;
999 #ifdef MBS_SUPPORT
1000- if (MB_CUR_MAX > 1)
1001- free (mb_properties);
1002+ /* For f_i_multibyte, the string at beg now matches first 3 chars of
1003+ one of the search strings (less if there are shorter search strings).
1004+ See if this is a real match. */
1005+ if (f_i_multibyte
1006+ && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], exact))
1007+ goto next_char;
1008 #endif /* MBS_SUPPORT */
1009- return beg - buf;
1010- }
1011+ len = kwsmatch.size[0];
1012+ if (exact && !match_words)
1013+ goto success_in_beg_and_len;
1014 if (match_lines)
1015 {
1016 if (beg > buf && beg[-1] != eol)
1017- continue;
1018+ goto next_char;
1019 if (beg + len < buf + size && beg[len] != eol)
1020- continue;
1021+ goto next_char;
1022 goto success;
1023 }
1024 else if (match_words)
1025- for (try = beg; len; )
1026- {
1027- if (try > buf && WCHAR((unsigned char) try[-1]))
1028- break;
1029- if (try + len < buf + size && WCHAR((unsigned char) try[len]))
1030- {
1031- offset = kwsexec (kwset, beg, --len, &kwsmatch);
1032- if (offset == (size_t) -1)
1033- {
1034+ {
1035+ while (len)
1036+ {
1037+ int word_match = 0;
1038+ if (beg > buf)
1039+ {
1040 #ifdef MBS_SUPPORT
1041- if (MB_CUR_MAX > 1)
1042- free (mb_properties);
1043+ if (mb_cur_max > 1)
1044+ {
1045+ const char *s;
1046+ int mr;
1047+ wchar_t pwc;
1048+
1049+ if (using_utf8)
1050+ {
1051+ s = beg - 1;
1052+ while (s > buf
1053+ && (unsigned char) *s >= 0x80
1054+ && (unsigned char) *s <= 0xbf)
1055+ --s;
1056+ }
1057+ else
1058+ s = last_char;
1059+ mr = mbtowc (&pwc, s, beg - s);
1060+ if (mr <= 0)
1061+ memset (&mbs, '\0', sizeof (mbstate_t));
1062+ else if ((iswalnum (pwc) || pwc == L'_')
1063+ && mr == (int) (beg - s))
1064+ goto next_char;
1065+ }
1066+ else
1067 #endif /* MBS_SUPPORT */
1068- return offset;
1069- }
1070- try = beg + offset;
1071- len = kwsmatch.size[0];
1072- }
1073- else
1074- goto success;
1075- }
1076+ if (WCHAR ((unsigned char) beg[-1]))
1077+ goto next_char;
1078+ }
1079+#ifdef MBS_SUPPORT
1080+ if (mb_cur_max > 1)
1081+ {
1082+ wchar_t nwc;
1083+ int mr;
1084+
1085+ mr = mbtowc (&nwc, beg + len, buf + size - beg - len);
1086+ if (mr <= 0)
1087+ {
1088+ memset (&mbs, '\0', sizeof (mbstate_t));
1089+ word_match = 1;
1090+ }
1091+ else if (!iswalnum (nwc) && nwc != L'_')
1092+ word_match = 1;
1093+ }
1094+ else
1095+#endif /* MBS_SUPPORT */
1096+ if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len]))
1097+ word_match = 1;
1098+ if (word_match)
1099+ {
1100+ if (!exact)
1101+ /* Returns the whole line now we know there's a word match. */
1102+ goto success;
1103+ else
1104+ /* Returns just this word match. */
1105+ goto success_in_beg_and_len;
1106+ }
1107+ if (len > 0)
1108+ {
1109+ /* Try a shorter length anchored at the same place. */
1110+ --len;
1111+ offset = kwsexec (kwset, beg, len, &kwsmatch);
1112+
1113+ if (offset == -1)
1114+ goto next_char; /* Try a different anchor. */
1115+#ifdef MBS_SUPPORT
1116+ if (mb_cur_max > 1 && !using_utf8)
1117+ {
1118+ size_t bytes_left = offset;
1119+ while (bytes_left)
1120+ {
1121+ size_t mlen = mbrlen (beg, bytes_left, &mbs);
1122+
1123+ last_char = beg;
1124+ if (mlen == (size_t) -1 || mlen == 0)
1125+ {
1126+ /* Incomplete character: treat as single-byte. */
1127+ memset (&mbs, '\0', sizeof (mbstate_t));
1128+ beg++;
1129+ bytes_left--;
1130+ continue;
1131+ }
1132+
1133+ if (mlen == (size_t) -2)
1134+ {
1135+ /* Offset points inside multibyte character:
1136+ * no good. */
1137+ break;
1138+ }
1139+
1140+ beg += mlen;
1141+ bytes_left -= mlen;
1142+ }
1143+
1144+ if (bytes_left)
1145+ {
1146+ memset (&mbs, '\0', sizeof (mbstate_t));
1147+ goto next_char; /* Try a different anchor. */
1148+ }
1149+ }
1150+ else
1151+#endif /* MBS_SUPPORT */
1152+ beg += offset;
1153+#ifdef MBS_SUPPORT
1154+ /* The string at beg now matches first 3 chars of one of
1155+ the search strings (less if there are shorter search
1156+ strings). See if this is a real match. */
1157+ if (f_i_multibyte
1158+ && Fimbexec (beg, len - offset, &kwsmatch.size[0],
1159+ exact))
1160+ goto next_char;
1161+#endif /* MBS_SUPPORT */
1162+ len = kwsmatch.size[0];
1163+ }
1164+ }
1165+ }
1166 else
1167 goto success;
1168- }
1169-
1170+next_char:;
1171 #ifdef MBS_SUPPORT
1172- if (MB_CUR_MAX > 1)
1173- free (mb_properties);
1174+ /* Advance to next character. For MB_CUR_MAX == 1 case this is handled
1175+ by ++beg above. */
1176+ if (mb_cur_max > 1)
1177+ {
1178+ if (using_utf8)
1179+ {
1180+ unsigned char c = *beg;
1181+ if (c >= 0xc2)
1182+ {
1183+ if (c < 0xe0)
1184+ ++beg;
1185+ else if (c < 0xf0)
1186+ beg += 2;
1187+ else if (c < 0xf8)
1188+ beg += 3;
1189+ else if (c < 0xfc)
1190+ beg += 4;
1191+ else if (c < 0xfe)
1192+ beg += 5;
1193+ }
1194+ }
1195+ else
1196+ {
1197+ size_t l = mbrlen (beg, buf + size - beg, &mbs);
1198+
1199+ last_char = beg;
1200+ if (l + 2 >= 2)
1201+ beg += l - 1;
1202+ else
1203+ memset (&mbs, '\0', sizeof (mbstate_t));
1204+ }
1205+ }
1206 #endif /* MBS_SUPPORT */
1207+ }
1208+
1209+ failure:
1210 return -1;
1211
1212 success:
1213+#ifdef MBS_SUPPORT
1214+ if (mb_cur_max > 1 && !using_utf8)
1215+ {
1216+ end = beg + len;
1217+ while (end < buf + size)
1218+ {
1219+ size_t mlen = mbrlen (end, buf + size - end, &mbs);
1220+ if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0)
1221+ {
1222+ memset (&mbs, '\0', sizeof (mbstate_t));
1223+ mlen = 1;
1224+ }
1225+ if (mlen == 1 && *end == eol)
1226+ break;
1227+
1228+ end += mlen;
1229+ }
1230+ }
1231+ else
1232+#endif /* MBS_SUPPORT */
1233 end = memchr (beg + len, eol, (buf + size) - (beg + len));
1234+
1235 end++;
1236 while (buf < beg && beg[-1] != eol)
1237 --beg;
1238- *match_size = end - beg;
1239-#ifdef MBS_SUPPORT
1240- if (MB_CUR_MAX > 1)
1241- free (mb_properties);
1242-#endif /* MBS_SUPPORT */
1243+ len = end - beg;
1244+ /* FALLTHROUGH */
1245+
1246+ success_in_beg_and_len:
1247+ *match_size = len;
1248 return beg - buf;
1249 }
1250
1251diff -urN grep-2.5.1a.orig/src/search.c.orig grep-2.5.1a/src/search.c.orig
1252--- grep-2.5.1a.orig/src/search.c.orig 1970-01-01 05:00:00.000000000 +0500
1253+++ grep-2.5.1a/src/search.c.orig 2005-10-23 09:48:39.000000000 +0600
1254@@ -0,0 +1,714 @@
1255+/* search.c - searching subroutines using dfa, kwset and regex for grep.
1256+ Copyright 1992, 1998, 2000 Free Software Foundation, Inc.
1257+
1258+ This program is free software; you can redistribute it and/or modify
1259+ it under the terms of the GNU General Public License as published by
1260+ the Free Software Foundation; either version 2, or (at your option)
1261+ any later version.
1262+
1263+ This program is distributed in the hope that it will be useful,
1264+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1265+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1266+ GNU General Public License for more details.
1267+
1268+ You should have received a copy of the GNU General Public License
1269+ along with this program; if not, write to the Free Software
1270+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
1271+ 02111-1307, USA. */
1272+
1273+/* Written August 1992 by Mike Haertel. */
1274+
1275+#ifdef HAVE_CONFIG_H
1276+# include <config.h>
1277+#endif
1278+#include <sys/types.h>
1279+#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
1280+/* We can handle multibyte string. */
1281+# define MBS_SUPPORT
1282+# include <wchar.h>
1283+# include <wctype.h>
1284+#endif
1285+
1286+#include "system.h"
1287+#include "grep.h"
1288+#include "regex.h"
1289+#include "dfa.h"
1290+#include "kwset.h"
1291+#include "error.h"
1292+#include "xalloc.h"
1293+#ifdef HAVE_LIBPCRE
1294+# include <pcre.h>
1295+#endif
1296+
1297+#define NCHAR (UCHAR_MAX + 1)
1298+
1299+/* For -w, we also consider _ to be word constituent. */
1300+#define WCHAR(C) (ISALNUM(C) || (C) == '_')
1301+
1302+/* DFA compiled regexp. */
1303+static struct dfa dfa;
1304+
1305+/* The Regex compiled patterns. */
1306+static struct patterns
1307+{
1308+ /* Regex compiled regexp. */
1309+ struct re_pattern_buffer regexbuf;
1310+ struct re_registers regs; /* This is here on account of a BRAIN-DEAD
1311+ Q@#%!# library interface in regex.c. */
1312+} patterns0;
1313+
1314+struct patterns *patterns;
1315+size_t pcount;
1316+
1317+/* KWset compiled pattern. For Ecompile and Gcompile, we compile
1318+ a list of strings, at least one of which is known to occur in
1319+ any string matching the regexp. */
1320+static kwset_t kwset;
1321+
1322+/* Number of compiled fixed strings known to exactly match the regexp.
1323+ If kwsexec returns < kwset_exact_matches, then we don't need to
1324+ call the regexp matcher at all. */
1325+static int kwset_exact_matches;
1326+
1327+#if defined(MBS_SUPPORT)
1328+static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
1329+#endif
1330+static void kwsinit PARAMS ((void));
1331+static void kwsmusts PARAMS ((void));
1332+static void Gcompile PARAMS ((char const *, size_t));
1333+static void Ecompile PARAMS ((char const *, size_t));
1334+static size_t EGexecute PARAMS ((char const *, size_t, size_t *, int ));
1335+static void Fcompile PARAMS ((char const *, size_t));
1336+static size_t Fexecute PARAMS ((char const *, size_t, size_t *, int));
1337+static void Pcompile PARAMS ((char const *, size_t ));
1338+static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));
1339+
1340+void
1341+dfaerror (char const *mesg)
1342+{
1343+ error (2, 0, mesg);
1344+}
1345+
1346+static void
1347+kwsinit (void)
1348+{
1349+ static char trans[NCHAR];
1350+ int i;
1351+
1352+ if (match_icase)
1353+ for (i = 0; i < NCHAR; ++i)
1354+ trans[i] = TOLOWER (i);
1355+
1356+ if (!(kwset = kwsalloc (match_icase ? trans : (char *) 0)))
1357+ error (2, 0, _("memory exhausted"));
1358+}
1359+
1360+/* If the DFA turns out to have some set of fixed strings one of
1361+ which must occur in the match, then we build a kwset matcher
1362+ to find those strings, and thus quickly filter out impossible
1363+ matches. */
1364+static void
1365+kwsmusts (void)
1366+{
1367+ struct dfamust const *dm;
1368+ char const *err;
1369+
1370+ if (dfa.musts)
1371+ {
1372+ kwsinit ();
1373+ /* First, we compile in the substrings known to be exact
1374+ matches. The kwset matcher will return the index
1375+ of the matching string that it chooses. */
1376+ for (dm = dfa.musts; dm; dm = dm->next)
1377+ {
1378+ if (!dm->exact)
1379+ continue;
1380+ ++kwset_exact_matches;
1381+ if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
1382+ error (2, 0, err);
1383+ }
1384+ /* Now, we compile the substrings that will require
1385+ the use of the regexp matcher. */
1386+ for (dm = dfa.musts; dm; dm = dm->next)
1387+ {
1388+ if (dm->exact)
1389+ continue;
1390+ if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
1391+ error (2, 0, err);
1392+ }
1393+ if ((err = kwsprep (kwset)) != 0)
1394+ error (2, 0, err);
1395+ }
1396+}
1397+
1398+#ifdef MBS_SUPPORT
1399+/* This function allocate the array which correspond to "buf".
1400+ Then this check multibyte string and mark on the positions which
1401+ are not singlebyte character nor the first byte of a multibyte
1402+ character. Caller must free the array. */
1403+static char*
1404+check_multibyte_string(char const *buf, size_t size)
1405+{
1406+ char *mb_properties = malloc(size);
1407+ mbstate_t cur_state;
1408+ int i;
1409+ memset(&cur_state, 0, sizeof(mbstate_t));
1410+ memset(mb_properties, 0, sizeof(char)*size);
1411+ for (i = 0; i < size ;)
1412+ {
1413+ size_t mbclen;
1414+ mbclen = mbrlen(buf + i, size - i, &cur_state);
1415+
1416+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
1417+ {
1418+ /* An invalid sequence, or a truncated multibyte character.
1419+ We treat it as a singlebyte character. */
1420+ mbclen = 1;
1421+ }
1422+ mb_properties[i] = mbclen;
1423+ i += mbclen;
1424+ }
1425+
1426+ return mb_properties;
1427+}
1428+#endif
1429+
1430+static void
1431+Gcompile (char const *pattern, size_t size)
1432+{
1433+ const char *err;
1434+ char const *sep;
1435+ size_t total = size;
1436+ char const *motif = pattern;
1437+
1438+ re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
1439+ dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
1440+
1441+ /* For GNU regex compiler we have to pass the patterns separately to detect
1442+ errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]"
1443+ GNU regex should have raise a syntax error. The same for backref, where
1444+ the backref should have been local to each pattern. */
1445+ do
1446+ {
1447+ size_t len;
1448+ sep = memchr (motif, '\n', total);
1449+ if (sep)
1450+ {
1451+ len = sep - motif;
1452+ sep++;
1453+ total -= (len + 1);
1454+ }
1455+ else
1456+ {
1457+ len = total;
1458+ total = 0;
1459+ }
1460+
1461+ patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
1462+ if (patterns == NULL)
1463+ error (2, errno, _("memory exhausted"));
1464+
1465+ patterns[pcount] = patterns0;
1466+
1467+ if ((err = re_compile_pattern (motif, len,
1468+ &(patterns[pcount].regexbuf))) != 0)
1469+ error (2, 0, err);
1470+ pcount++;
1471+
1472+ motif = sep;
1473+ } while (sep && total != 0);
1474+
1475+ /* In the match_words and match_lines cases, we use a different pattern
1476+ for the DFA matcher that will quickly throw out cases that won't work.
1477+ Then if DFA succeeds we do some hairy stuff using the regex matcher
1478+ to decide whether the match should really count. */
1479+ if (match_words || match_lines)
1480+ {
1481+ /* In the whole-word case, we use the pattern:
1482+ \(^\|[^[:alnum:]_]\)\(userpattern\)\([^[:alnum:]_]|$\).
1483+ In the whole-line case, we use the pattern:
1484+ ^\(userpattern\)$. */
1485+
1486+ static char const line_beg[] = "^\\(";
1487+ static char const line_end[] = "\\)$";
1488+ static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
1489+ static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)";
1490+ char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
1491+ size_t i;
1492+ strcpy (n, match_lines ? line_beg : word_beg);
1493+ i = strlen (n);
1494+ memcpy (n + i, pattern, size);
1495+ i += size;
1496+ strcpy (n + i, match_lines ? line_end : word_end);
1497+ i += strlen (n + i);
1498+ pattern = n;
1499+ size = i;
1500+ }
1501+
1502+ dfacomp (pattern, size, &dfa, 1);
1503+ kwsmusts ();
1504+}
1505+
1506+static void
1507+Ecompile (char const *pattern, size_t size)
1508+{
1509+ const char *err;
1510+ const char *sep;
1511+ size_t total = size;
1512+ char const *motif = pattern;
1513+
1514+ if (strcmp (matcher, "awk") == 0)
1515+ {
1516+ re_set_syntax (RE_SYNTAX_AWK);
1517+ dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
1518+ }
1519+ else
1520+ {
1521+ re_set_syntax (RE_SYNTAX_POSIX_EGREP);
1522+ dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
1523+ }
1524+
1525+ /* For GNU regex compiler we have to pass the patterns separately to detect
1526+ errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]"
1527+ GNU regex should have raise a syntax error. The same for backref, where
1528+ the backref should have been local to each pattern. */
1529+ do
1530+ {
1531+ size_t len;
1532+ sep = memchr (motif, '\n', total);
1533+ if (sep)
1534+ {
1535+ len = sep - motif;
1536+ sep++;
1537+ total -= (len + 1);
1538+ }
1539+ else
1540+ {
1541+ len = total;
1542+ total = 0;
1543+ }
1544+
1545+ patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
1546+ if (patterns == NULL)
1547+ error (2, errno, _("memory exhausted"));
1548+ patterns[pcount] = patterns0;
1549+
1550+ if ((err = re_compile_pattern (motif, len,
1551+ &(patterns[pcount].regexbuf))) != 0)
1552+ error (2, 0, err);
1553+ pcount++;
1554+
1555+ motif = sep;
1556+ } while (sep && total != 0);
1557+
1558+ /* In the match_words and match_lines cases, we use a different pattern
1559+ for the DFA matcher that will quickly throw out cases that won't work.
1560+ Then if DFA succeeds we do some hairy stuff using the regex matcher
1561+ to decide whether the match should really count. */
1562+ if (match_words || match_lines)
1563+ {
1564+ /* In the whole-word case, we use the pattern:
1565+ (^|[^[:alnum:]_])(userpattern)([^[:alnum:]_]|$).
1566+ In the whole-line case, we use the pattern:
1567+ ^(userpattern)$. */
1568+
1569+ static char const line_beg[] = "^(";
1570+ static char const line_end[] = ")$";
1571+ static char const word_beg[] = "(^|[^[:alnum:]_])(";
1572+ static char const word_end[] = ")([^[:alnum:]_]|$)";
1573+ char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
1574+ size_t i;
1575+ strcpy (n, match_lines ? line_beg : word_beg);
1576+ i = strlen(n);
1577+ memcpy (n + i, pattern, size);
1578+ i += size;
1579+ strcpy (n + i, match_lines ? line_end : word_end);
1580+ i += strlen (n + i);
1581+ pattern = n;
1582+ size = i;
1583+ }
1584+
1585+ dfacomp (pattern, size, &dfa, 1);
1586+ kwsmusts ();
1587+}
1588+
1589+static size_t
1590+EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
1591+{
1592+ register char const *buflim, *beg, *end;
1593+ char eol = eolbyte;
1594+ int backref, start, len;
1595+ struct kwsmatch kwsm;
1596+ size_t i;
1597+#ifdef MBS_SUPPORT
1598+ char *mb_properties = NULL;
1599+#endif /* MBS_SUPPORT */
1600+
1601+#ifdef MBS_SUPPORT
1602+ if (MB_CUR_MAX > 1 && kwset)
1603+ mb_properties = check_multibyte_string(buf, size);
1604+#endif /* MBS_SUPPORT */
1605+
1606+ buflim = buf + size;
1607+
1608+ for (beg = end = buf; end < buflim; beg = end)
1609+ {
1610+ if (!exact)
1611+ {
1612+ if (kwset)
1613+ {
1614+ /* Find a possible match using the KWset matcher. */
1615+ size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
1616+ if (offset == (size_t) -1)
1617+ goto failure;
1618+ beg += offset;
1619+ /* Narrow down to the line containing the candidate, and
1620+ run it through DFA. */
1621+ end = memchr(beg, eol, buflim - beg);
1622+ end++;
1623+#ifdef MBS_SUPPORT
1624+ if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
1625+ continue;
1626+#endif
1627+ while (beg > buf && beg[-1] != eol)
1628+ --beg;
1629+ if (kwsm.index < kwset_exact_matches)
1630+ goto success_in_beg_and_end;
1631+ if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
1632+ continue;
1633+ }
1634+ else
1635+ {
1636+ /* No good fixed strings; start with DFA. */
1637+ size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
1638+ if (offset == (size_t) -1)
1639+ break;
1640+ /* Narrow down to the line we've found. */
1641+ beg += offset;
1642+ end = memchr (beg, eol, buflim - beg);
1643+ end++;
1644+ while (beg > buf && beg[-1] != eol)
1645+ --beg;
1646+ }
1647+ /* Successful, no backreferences encountered! */
1648+ if (!backref)
1649+ goto success_in_beg_and_end;
1650+ }
1651+ else
1652+ end = beg + size;
1653+
1654+ /* If we've made it to this point, this means DFA has seen
1655+ a probable match, and we need to run it through Regex. */
1656+ for (i = 0; i < pcount; i++)
1657+ {
1658+ patterns[i].regexbuf.not_eol = 0;
1659+ if (0 <= (start = re_search (&(patterns[i].regexbuf), beg,
1660+ end - beg - 1, 0,
1661+ end - beg - 1, &(patterns[i].regs))))
1662+ {
1663+ len = patterns[i].regs.end[0] - start;
1664+ if (exact && !match_words)
1665+ goto success_in_start_and_len;
1666+ if ((!match_lines && !match_words)
1667+ || (match_lines && len == end - beg - 1))
1668+ goto success_in_beg_and_end;
1669+ /* If -w, check if the match aligns with word boundaries.
1670+ We do this iteratively because:
1671+ (a) the line may contain more than one occurence of the
1672+ pattern, and
1673+ (b) Several alternatives in the pattern might be valid at a
1674+ given point, and we may need to consider a shorter one to
1675+ find a word boundary. */
1676+ if (match_words)
1677+ while (start >= 0)
1678+ {
1679+ if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
1680+ && (len == end - beg - 1
1681+ || !WCHAR ((unsigned char) beg[start + len])))
1682+ goto success_in_beg_and_end;
1683+ if (len > 0)
1684+ {
1685+ /* Try a shorter length anchored at the same place. */
1686+ --len;
1687+ patterns[i].regexbuf.not_eol = 1;
1688+ len = re_match (&(patterns[i].regexbuf), beg,
1689+ start + len, start,
1690+ &(patterns[i].regs));
1691+ }
1692+ if (len <= 0)
1693+ {
1694+ /* Try looking further on. */
1695+ if (start == end - beg - 1)
1696+ break;
1697+ ++start;
1698+ patterns[i].regexbuf.not_eol = 0;
1699+ start = re_search (&(patterns[i].regexbuf), beg,
1700+ end - beg - 1,
1701+ start, end - beg - 1 - start,
1702+ &(patterns[i].regs));
1703+ len = patterns[i].regs.end[0] - start;
1704+ }
1705+ }
1706+ }
1707+ } /* for Regex patterns. */
1708+ } /* for (beg = end ..) */
1709+
1710+ failure:
1711+#ifdef MBS_SUPPORT
1712+ if (MB_CUR_MAX > 1 && mb_properties)
1713+ free (mb_properties);
1714+#endif /* MBS_SUPPORT */
1715+ return (size_t) -1;
1716+
1717+ success_in_beg_and_end:
1718+ len = end - beg;
1719+ start = beg - buf;
1720+ /* FALLTHROUGH */
1721+
1722+ success_in_start_and_len:
1723+#ifdef MBS_SUPPORT
1724+ if (MB_CUR_MAX > 1 && mb_properties)
1725+ free (mb_properties);
1726+#endif /* MBS_SUPPORT */
1727+ *match_size = len;
1728+ return start;
1729+}
1730+
1731+static void
1732+Fcompile (char const *pattern, size_t size)
1733+{
1734+ char const *beg, *lim, *err;
1735+
1736+ kwsinit ();
1737+ beg = pattern;
1738+ do
1739+ {
1740+ for (lim = beg; lim < pattern + size && *lim != '\n'; ++lim)
1741+ ;
1742+ if ((err = kwsincr (kwset, beg, lim - beg)) != 0)
1743+ error (2, 0, err);
1744+ if (lim < pattern + size)
1745+ ++lim;
1746+ beg = lim;
1747+ }
1748+ while (beg < pattern + size);
1749+
1750+ if ((err = kwsprep (kwset)) != 0)
1751+ error (2, 0, err);
1752+}
1753+
1754+static size_t
1755+Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
1756+{
1757+ register char const *beg, *try, *end;
1758+ register size_t len;
1759+ char eol = eolbyte;
1760+ struct kwsmatch kwsmatch;
1761+#ifdef MBS_SUPPORT
1762+ char *mb_properties;
1763+ if (MB_CUR_MAX > 1)
1764+ mb_properties = check_multibyte_string (buf, size);
1765+#endif /* MBS_SUPPORT */
1766+
1767+ for (beg = buf; beg <= buf + size; ++beg)
1768+ {
1769+ size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
1770+ if (offset == (size_t) -1)
1771+ goto failure;
1772+#ifdef MBS_SUPPORT
1773+ if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
1774+ continue; /* It is a part of multibyte character. */
1775+#endif /* MBS_SUPPORT */
1776+ beg += offset;
1777+ len = kwsmatch.size[0];
1778+ if (exact && !match_words)
1779+ goto success_in_beg_and_len;
1780+ if (match_lines)
1781+ {
1782+ if (beg > buf && beg[-1] != eol)
1783+ continue;
1784+ if (beg + len < buf + size && beg[len] != eol)
1785+ continue;
1786+ goto success;
1787+ }
1788+ else if (match_words)
1789+ for (try = beg; len; )
1790+ {
1791+ if (try > buf && WCHAR((unsigned char) try[-1]))
1792+ break;
1793+ if (try + len < buf + size && WCHAR((unsigned char) try[len]))
1794+ {
1795+ offset = kwsexec (kwset, beg, --len, &kwsmatch);
1796+ if (offset == (size_t) -1)
1797+ {
1798+#ifdef MBS_SUPPORT
1799+ if (MB_CUR_MAX > 1)
1800+ free (mb_properties);
1801+#endif /* MBS_SUPPORT */
1802+ return offset;
1803+ }
1804+ try = beg + offset;
1805+ len = kwsmatch.size[0];
1806+ }
1807+ else
1808+ goto success;
1809+ }
1810+ else
1811+ goto success;
1812+ }
1813+
1814+ failure:
1815+#ifdef MBS_SUPPORT
1816+ if (MB_CUR_MAX > 1)
1817+ free (mb_properties);
1818+#endif /* MBS_SUPPORT */
1819+ return -1;
1820+
1821+ success:
1822+ end = memchr (beg + len, eol, (buf + size) - (beg + len));
1823+ end++;
1824+ while (buf < beg && beg[-1] != eol)
1825+ --beg;
1826+ len = end - beg;
1827+ /* FALLTHROUGH */
1828+
1829+ success_in_beg_and_len:
1830+ *match_size = len;
1831+#ifdef MBS_SUPPORT
1832+ if (MB_CUR_MAX > 1)
1833+ free (mb_properties);
1834+#endif /* MBS_SUPPORT */
1835+ return beg - buf;
1836+}
1837+
1838+#if HAVE_LIBPCRE
1839+/* Compiled internal form of a Perl regular expression. */
1840+static pcre *cre;
1841+
1842+/* Additional information about the pattern. */
1843+static pcre_extra *extra;
1844+#endif
1845+
1846+static void
1847+Pcompile (char const *pattern, size_t size)
1848+{
1849+#if !HAVE_LIBPCRE
1850+ error (2, 0, _("The -P option is not supported"));
1851+#else
1852+ int e;
1853+ char const *ep;
1854+ char *re = xmalloc (4 * size + 7);
1855+ int flags = PCRE_MULTILINE | (match_icase ? PCRE_CASELESS : 0);
1856+ char const *patlim = pattern + size;
1857+ char *n = re;
1858+ char const *p;
1859+ char const *pnul;
1860+
1861+ /* FIXME: Remove this restriction. */
1862+ if (eolbyte != '\n')
1863+ error (2, 0, _("The -P and -z options cannot be combined"));
1864+
1865+ *n = '\0';
1866+ if (match_lines)
1867+ strcpy (n, "^(");
1868+ if (match_words)
1869+ strcpy (n, "\\b(");
1870+ n += strlen (n);
1871+
1872+ /* The PCRE interface doesn't allow NUL bytes in the pattern, so
1873+ replace each NUL byte in the pattern with the four characters
1874+ "\000", removing a preceding backslash if there are an odd
1875+ number of backslashes before the NUL.
1876+
1877+ FIXME: This method does not work with some multibyte character
1878+ encodings, notably Shift-JIS, where a multibyte character can end
1879+ in a backslash byte. */
1880+ for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
1881+ {
1882+ memcpy (n, p, pnul - p);
1883+ n += pnul - p;
1884+ for (p = pnul; pattern < p && p[-1] == '\\'; p--)
1885+ continue;
1886+ n -= (pnul - p) & 1;
1887+ strcpy (n, "\\000");
1888+ n += 4;
1889+ }
1890+
1891+ memcpy (n, p, patlim - p);
1892+ n += patlim - p;
1893+ *n = '\0';
1894+ if (match_words)
1895+ strcpy (n, ")\\b");
1896+ if (match_lines)
1897+ strcpy (n, ")$");
1898+
1899+ cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
1900+ if (!cre)
1901+ error (2, 0, ep);
1902+
1903+ extra = pcre_study (cre, 0, &ep);
1904+ if (ep)
1905+ error (2, 0, ep);
1906+
1907+ free (re);
1908+#endif
1909+}
1910+
1911+static size_t
1912+Pexecute (char const *buf, size_t size, size_t *match_size, int exact)
1913+{
1914+#if !HAVE_LIBPCRE
1915+ abort ();
1916+ return -1;
1917+#else
1918+ /* This array must have at least two elements; everything after that
1919+ is just for performance improvement in pcre_exec. */
1920+ int sub[300];
1921+
1922+ int e = pcre_exec (cre, extra, buf, size, 0, 0,
1923+ sub, sizeof sub / sizeof *sub);
1924+
1925+ if (e <= 0)
1926+ {
1927+ switch (e)
1928+ {
1929+ case PCRE_ERROR_NOMATCH:
1930+ return -1;
1931+
1932+ case PCRE_ERROR_NOMEMORY:
1933+ error (2, 0, _("Memory exhausted"));
1934+
1935+ default:
1936+ abort ();
1937+ }
1938+ }
1939+ else
1940+ {
1941+ /* Narrow down to the line we've found. */
1942+ char const *beg = buf + sub[0];
1943+ char const *end = buf + sub[1];
1944+ char const *buflim = buf + size;
1945+ char eol = eolbyte;
1946+ if (!exact)
1947+ {
1948+ end = memchr (end, eol, buflim - end);
1949+ end++;
1950+ while (buf < beg && beg[-1] != eol)
1951+ --beg;
1952+ }
1953+
1954+ *match_size = end - beg;
1955+ return beg - buf;
1956+ }
1957+#endif
1958+}
1959+
1960+struct matcher const matchers[] = {
1961+ { "default", Gcompile, EGexecute },
1962+ { "grep", Gcompile, EGexecute },
1963+ { "egrep", Ecompile, EGexecute },
1964+ { "awk", Ecompile, EGexecute },
1965+ { "fgrep", Fcompile, Fexecute },
1966+ { "perl", Pcompile, Pexecute },
1967+ { "", 0, 0 },
1968+};
1969diff -urN grep-2.5.1a.orig/tests/fmbtest.sh grep-2.5.1a/tests/fmbtest.sh
1970--- grep-2.5.1a.orig/tests/fmbtest.sh 1970-01-01 05:00:00.000000000 +0500
1971+++ grep-2.5.1a/tests/fmbtest.sh 2005-10-23 09:51:12.000000000 +0600
1972@@ -0,0 +1,111 @@
1973+#!/bin/sh
1974+
1975+: ${srcdir=.}
1976+
1977+# If cs_CZ.UTF-8 locale doesn't work, skip this test silently
1978+LC_ALL=cs_CZ.UTF-8 locale -k LC_CTYPE 2>/dev/null | ${GREP} -q charmap.*UTF-8 \
1979+ || exit 77
1980+
1981+failures=0
1982+
1983+cat > csinput <<EOF
1984+01 Ŝluťoučká číše
1985+ČíŠE 02
1986+03 Z číší Čiší cosi
1987+04 Čí
1988+Å e 05
1989+06 ČČČČČČČíšČÍŠčíš
1990+07 ČČČ ČČČČíšČÍŠčíšEEEE
1991+čAs 08
1992+09Čapka
1993+10ČaSy se měnÍ
1994+ČÍšE11
1995+Čas12
1996+𝇕ČÍšE𝇓13
1997+ŜČÍšE𝇓14
1998+𝇕ČÍšEÅœ15
1999+ŜČÍšEÅœ16
2000+ČÍšE𝇓17
2001+ČÍšEÅœ18
2002+19𝇕ČÍše
2003+20ŜČÍše
2004+EOF
2005+cat > cspatfile <<EOF
2006+ČÍšE
2007+Čas
2008+EOF
2009+
2010+for mode in F G E; do
2011+
2012+test1="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode} -f cspatfile csinput \
2013+ | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
2014+if test "$test1" != "11 12 13 14 15 16 17 18"; then
2015+ echo "Test #1 ${mode} failed: $test1"
2016+ failures=1
2017+fi
2018+
2019+test2="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}i -f cspatfile csinput \
2020+ | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
2021+if test "$test2" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
2022+ echo "Test #2 ${mode} failed: $test2"
2023+ failures=1
2024+fi
2025+
2026+test3="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}i -e 'ČÍšE' -e 'Čas' csinput \
2027+ | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
2028+if test "$test3" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
2029+ echo "Test #3 ${mode} failed: $test3"
2030+ failures=1
2031+fi
2032+
2033+test4="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}iw -f cspatfile csinput \
2034+ | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
2035+if test "$test4" != "01 02 08 13 17 19"; then
2036+ echo "Test #4 ${mode} failed: $test4"
2037+ failures=1
2038+fi
2039+
2040+done
2041+
2042+# Test that -F --color=always prefers longer matches.
2043+test5="`echo 'Cosi tu ČišÍ...' \
2044+ | LC_ALL=cs_CZ.UTF-8 ${GREP} --color=always -Fi -e 'čiš' -e 'čiší'`"
2045+if echo "$test5" | LC_ALL=C ${GREP} -q 'Cosi tu .*\[.*mČišÍ.*\[.*m\(.\[K\)\?\.\.\.'; then
2046+ :
2047+else
2048+ echo "Test #5 F failed: $test5"
2049+ failures=1
2050+fi
2051+
2052+for mode in G E; do
2053+
2054+# Test that -{G,E} --color=always prefers earlier pattern matches.
2055+test6="`echo 'Cosi tu ČišÍ...' \
2056+ | LC_ALL=cs_CZ.UTF-8 ${GREP} --color=always -${mode}i -e 'čiš' -e 'čiší'`"
2057+if echo "$test6" | LC_ALL=C ${GREP} -q 'Cosi tu .*\[.*mČiš.*\[.*m\(.\[K\)\?Í\.\.\.'; then
2058+ :
2059+else
2060+ echo "Test #6 ${mode} failed: $test6"
2061+ failures=1
2062+fi
2063+
2064+# Test that -{G,E} --color=always prefers earlier pattern matches.
2065+test7="`echo 'Cosi tu ČišÍ...' \
2066+ | LC_ALL=cs_CZ.UTF-8 ${GREP} --color=always -${mode}i -e 'čiší' -e 'čiš'`"
2067+if echo "$test7" | LC_ALL=C ${GREP} -q 'Cosi tu .*\[.*mČišÍ.*\[.*m\(.\[K\)\?\.\.\.'; then
2068+ :
2069+else
2070+ echo "Test #7 ${mode} failed: $test7"
2071+ failures=1
2072+fi
2073+
2074+test8="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}i -e 'Č.šE' -e 'Č[a-f]s' csinput \
2075+ | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
2076+if test "$test8" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
2077+ echo "Test #8 ${mode} failed: $test8"
2078+ failures=1
2079+fi
2080+
2081+done
2082+
2083+exit $failures
2084diff -urN grep-2.5.1a.orig/tests/Makefile.am grep-2.5.1a/tests/Makefile.am
2085--- grep-2.5.1a.orig/tests/Makefile.am 2001-03-07 09:11:27.000000000 +0500
2086+++ grep-2.5.1a/tests/Makefile.am 2005-10-23 09:51:12.000000000 +0600
2087@@ -3,7 +3,8 @@
2088 AWK=@AWK@
2089
2090 TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh \
2091- status.sh empty.sh options.sh backref.sh file.sh
2092+ status.sh empty.sh options.sh backref.sh file.sh \
2093+ fmbtest.sh
2094 EXTRA_DIST = $(TESTS) \
2095 khadafy.lines khadafy.regexp \
2096 spencer1.awk spencer1.tests \
2097diff -urN grep-2.5.1a.orig/tests/Makefile.in grep-2.5.1a/tests/Makefile.in
2098--- grep-2.5.1a.orig/tests/Makefile.in 2002-03-26 21:09:36.000000000 +0500
2099+++ grep-2.5.1a/tests/Makefile.in 2005-10-23 09:51:13.000000000 +0600
2100@@ -97,7 +97,8 @@
2101 AWK = @AWK@
2102
2103 TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh \
2104- status.sh empty.sh options.sh backref.sh file.sh
2105+ status.sh empty.sh options.sh backref.sh file.sh \
2106+ fmbtest.sh
2107
2108 EXTRA_DIST = $(TESTS) \
2109 khadafy.lines khadafy.regexp \
Note: See TracBrowser for help on using the repository browser.