Context Navigation

source: scripts/patches/grep-2.5.1a-redhat_fixes-2.patch@ 5ff8cf2

Visit:

clfs-1.2 clfs-2.1 clfs-3.0.0-systemd clfs-3.0.0-sysvinit systemd sysvinit

Last change on this file since 5ff8cf2 was c0cf39e, checked in by Jim Gifford <clfs@…>, 19 years ago

r2506@server (orig r1245): ryan | 2006-03-10 02:07:10 -0800

r1295@rei: lfs | 2006-03-09 18:29:16 +1100
Add grep-2.5.1a-redhat_fixes-2.patch
------------------------------------
Submitted by: Alexander E. Patrakov
Date: 2005-08-13
Initial Package Version: 2.5.1a
Upstream Status: Partially accepted, partially rejected, but required for LSB >= 2.0 certification
Origin: RedHat
Description: Various fixes from RedHat. Individual patches:

grep-2.5.1-fgrep.patch
grep-2.5.1-bracket.patch
grep-2.5-i18n.patch
grep-2.5.1-oi.patch
grep-2.5.1-manpage.patch
grep-2.5.1-color.patch
grep-2.5.1-icolor.patch
grep-2.5.1-egf-speedup.patch
grep-2.5.1-dfa-optional.patch
grep-2.5.1-tests.patch
grep-2.5.1-w.patch

Testcases:

-fgrep: ???, but required for other patches
-bracket: echo "[" | LANG=en_US.UTF-8 grep ":space:"
-i18n: many fixes for multibyte locale support, required for LSB.
-oi: echo xxYYzz | LANG=C grep -i -o yy
-manpage: typo
-color: restore the background color correctly
-icolor: ??? echo 'spam foo SPAM FOO' | grep -i --color spam

(but that's also fixed by -oi. Is this patch just a cleanup?)

-egf-speedup: without this, grep is as slow as a snail in UTF-8 locales.
-dfa-optional: disables dfa in multibyte locales by default.
-w: (echo 'foo';echo 'fo') > /tmp/testfile && grep -F -w fo /tmp/testfile

Property mode set to 100644

File size: 54.8 KB

Rev	Line
[c0cf39e]	1	Submitted by: Alexander E. Patrakov
	2	Date: 2005-08-13
	3	Initial Package Version: 2.5.1a
	4	Upstream Status: Partially accepted, partially rejected, but required for LSB >= 2.0 certification
	5	Origin: RedHat
	6	Description: Various fixes from RedHat. Individual patches:
	7
	8	grep-2.5.1-fgrep.patch
	9	grep-2.5.1-bracket.patch
	10	grep-2.5-i18n.patch
	11	grep-2.5.1-oi.patch
	12	grep-2.5.1-manpage.patch
	13	grep-2.5.1-color.patch
	14	grep-2.5.1-icolor.patch
	15	grep-2.5.1-egf-speedup.patch
	16	grep-2.5.1-dfa-optional.patch
	17	grep-2.5.1-tests.patch
	18	grep-2.5.1-w.patch
	19
	20	Testcases:
	21
	22	-fgrep: ???, but required for other patches
	23	-bracket: echo "[" \| LANG=en_US.UTF-8 grep "[[:space:]]"
	24	-i18n: many fixes for multibyte locale support, required for LSB.
	25	-oi: echo xxYYzz \| LANG=C grep -i -o yy
	26	-manpage: typo
	27	-color: restore the background color correctly
	28	-icolor: ??? echo 'spam foo SPAM FOO' \| grep -i --color spam
	29	(but that's also fixed by -oi. Is this patch just a cleanup?)
	30	-egf-speedup: without this, grep is as slow as a snail in UTF-8 locales.
	31	-dfa-optional: disables dfa in multibyte locales by default.
	32	-w: (echo 'foo';echo 'fo') > /tmp/testfile && grep -F -w fo /tmp/testfile
	33
	34	diff -urN grep-2.5.1a.orig/doc/grep.1 grep-2.5.1a/doc/grep.1
	35	--- grep-2.5.1a.orig/doc/grep.1 2004-11-12 16:26:37.000000000 +0500
	36	+++ grep-2.5.1a/doc/grep.1 2005-10-23 09:49:43.000000000 +0600
	37	@@ -191,6 +191,7 @@
	38	.I PATTERN
	39	as a list of fixed strings, separated by newlines,
	40	any of which is to be matched.
	41	+.TP
	42	.BR \-P ", " \-\^\-perl-regexp
	43	Interpret
	44	.I PATTERN
	45	@@ -302,7 +303,7 @@
	46	This is especially useful for tools like zgrep, e.g.
	47	.B "gzip -cd foo.gz \|grep --label=foo something"
	48	.TP
	49	-.BR \-\^\-line-buffering
	50	+.BR \-\^\-line-buffered
	51	Use line buffering, it can be a performance penality.
	52	.TP
	53	.BR \-q ", " \-\^\-quiet ", " \-\^\-silent
	54	diff -urN grep-2.5.1a.orig/lib/posix/regex.h grep-2.5.1a/lib/posix/regex.h
	55	--- grep-2.5.1a.orig/lib/posix/regex.h 2001-04-02 23:56:50.000000000 +0600
	56	+++ grep-2.5.1a/lib/posix/regex.h 2005-10-23 09:49:31.000000000 +0600
	57	@@ -109,6 +109,10 @@
	58	If not set, \{, \}, {, and } are literals. */
	59	#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
	60
	61	+/* If this bit is set, then ignore case when matching.
	62	+ If not set, then case is significant. */
	63	+#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
	64	+
	65	/* If this bit is set, +, ? and \| aren't recognized as operators.
	66	If not set, they are. */
	67	#define RE_LIMITED_OPS (RE_INTERVALS << 1)
	68	diff -urN grep-2.5.1a.orig/src/dfa.c grep-2.5.1a/src/dfa.c
	69	--- grep-2.5.1a.orig/src/dfa.c 2001-09-26 22:57:55.000000000 +0600
	70	+++ grep-2.5.1a/src/dfa.c 2005-10-23 09:49:17.000000000 +0600
	71	@@ -414,7 +414,7 @@
	72
	73	/* This function fetch a wide character, and update cur_mb_len,
	74	used only if the current locale is a multibyte environment. */
	75	-static wchar_t
	76	+static wint_t
	77	fetch_wc (char const *eoferr)
	78	{
	79	wchar_t wc;
	80	@@ -423,7 +423,7 @@
	81	if (eoferr != 0)
	82	dfaerror (eoferr);
	83	else
	84	- return -1;
	85	+ return WEOF;
	86	}
	87
	88	cur_mb_len = mbrtowc(&wc, lexptr, lexleft, &mbs);
	89	@@ -459,7 +459,7 @@
	90	static void
	91	parse_bracket_exp_mb ()
	92	{
	93	- wchar_t wc, wc1, wc2;
	94	+ wint_t wc, wc1, wc2;
	95
	96	/* Work area to build a mb_char_classes. */
	97	struct mb_char_classes *work_mbc;
	98	@@ -496,7 +496,7 @@
	99	work_mbc->invert = 0;
	100	do
	101	{
	102	- wc1 = -1; /* mark wc1 is not initialized". */
	103	+ wc1 = WEOF; /* mark wc1 is not initialized". */
	104
	105	/* Note that if we're looking at some other [:...:] construct,
	106	we just treat it as a bunch of ordinary characters. We can do
	107	@@ -586,7 +586,7 @@
	108	work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem;
	109	}
	110	}
	111	- wc = -1;
	112	+ wc1 = wc = WEOF;
	113	}
	114	else
	115	/* We treat '[' as a normal character here. */
	116	@@ -600,7 +600,7 @@
	117	wc = fetch_wc(("Unbalanced ["));
	118	}
	119
	120	- if (wc1 == -1)
	121	+ if (wc1 == WEOF)
	122	wc1 = fetch_wc(_("Unbalanced ["));
	123
	124	if (wc1 == L'-')
	125	@@ -630,17 +630,17 @@
	126	}
	127	REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t,
	128	range_sts_al, work_mbc->nranges + 1);
	129	- work_mbc->range_sts[work_mbc->nranges] = wc;
	130	+ work_mbc->range_sts[work_mbc->nranges] = (wchar_t)wc;
	131	REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t,
	132	range_ends_al, work_mbc->nranges + 1);
	133	- work_mbc->range_ends[work_mbc->nranges++] = wc2;
	134	+ work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2;
	135	}
	136	- else if (wc != -1)
	137	+ else if (wc != WEOF)
	138	/* build normal characters. */
	139	{
	140	REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al,
	141	work_mbc->nchars + 1);
	142	- work_mbc->chars[work_mbc->nchars++] = wc;
	143	+ work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc;
	144	}
	145	}
	146	while ((wc = wc1) != L']');
	147	@@ -2552,6 +2552,8 @@
	148	}
	149
	150	/* match with a character? */
	151	+ if (case_fold)
	152	+ wc = towlower (wc);
	153	for (i = 0; i<work_mbc->nchars; i++)
	154	{
	155	if (wc == work_mbc->chars[i])
	156	diff -urN grep-2.5.1a.orig/src/grep.c grep-2.5.1a/src/grep.c
	157	--- grep-2.5.1a.orig/src/grep.c 2004-11-12 16:25:35.000000000 +0500
	158	+++ grep-2.5.1a/src/grep.c 2005-10-23 09:50:06.000000000 +0600
	159	@@ -30,6 +30,12 @@
	160	# include <sys/time.h>
	161	# include <sys/resource.h>
	162	#endif
	163	+#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
	164	+/* We can handle multibyte string. */
	165	+# define MBS_SUPPORT
	166	+# include <wchar.h>
	167	+# include <wctype.h>
	168	+#endif
	169	#include <stdio.h>
	170	#include "system.h"
	171	#include "getopt.h"
	172	@@ -558,33 +564,6 @@
	173	{
	174	size_t match_size;
	175	size_t match_offset;
	176	- if(match_icase)
	177	- {
	178	- /* Yuck, this is tricky */
	179	- char buf = (char) xmalloc (lim - beg);
	180	- char *ibeg = buf;
	181	- char *ilim = ibeg + (lim - beg);
	182	- int i;
	183	- for (i = 0; i < lim - beg; i++)
	184	- ibeg[i] = tolower (beg[i]);
	185	- while ((match_offset = (*execute) (ibeg, ilim-ibeg, &match_size, 1))
	186	- != (size_t) -1)
	187	- {
	188	- char const *b = beg + match_offset;
	189	- if (b == lim)
	190	- break;
	191	- fwrite (beg, sizeof (char), match_offset, stdout);
	192	- printf ("\33[%sm", grep_color);
	193	- fwrite (b, sizeof (char), match_size, stdout);
	194	- fputs ("\33[00m", stdout);
	195	- beg = b + match_size;
	196	- ibeg = ibeg + match_offset + match_size;
	197	- }
	198	- fwrite (beg, 1, lim - beg, stdout);
	199	- free (buf);
	200	- lastout = lim;
	201	- return;
	202	- }
	203	while (lim-beg && (match_offset = (*execute) (beg, lim - beg, &match_size, 1))
	204	!= (size_t) -1)
	205	{
	206	@@ -601,6 +580,7 @@
	207	fputs ("\33[00m", stdout);
	208	beg = b + match_size;
	209	}
	210	+ fputs ("\33[K", stdout);
	211	}
	212	fwrite (beg, 1, lim - beg, stdout);
	213	if (ferror (stdout))
	214	@@ -1697,6 +1677,37 @@
	215	if (!install_matcher (matcher) && !install_matcher ("default"))
	216	abort ();
	217
	218	+#ifdef MBS_SUPPORT
	219	+ if (MB_CUR_MAX != 1 && match_icase)
	220	+ {
	221	+ wchar_t wc;
	222	+ mbstate_t cur_state, prev_state;
	223	+ int i, len = strlen(keys);
	224	+
	225	+ memset(&cur_state, 0, sizeof(mbstate_t));
	226	+ for (i = 0; i <= len ;)
	227	+ {
	228	+ size_t mbclen;
	229	+ mbclen = mbrtowc(&wc, keys + i, len - i, &cur_state);
	230	+ if (mbclen == (size_t) -1 \|\| mbclen == (size_t) -2 \|\| mbclen == 0)
	231	+ {
	232	+ /* An invalid sequence, or a truncated multibyte character.
	233	+ We treat it as a singlebyte character. */
	234	+ mbclen = 1;
	235	+ }
	236	+ else
	237	+ {
	238	+ if (iswupper((wint_t)wc))
	239	+ {
	240	+ wc = towlower((wint_t)wc);
	241	+ wcrtomb(keys + i, wc, &cur_state);
	242	+ }
	243	+ }
	244	+ i += mbclen;
	245	+ }
	246	+ }
	247	+#endif /* MBS_SUPPORT */
	248	+
	249	(*compile)(keys, keycc);
	250
	251	if ((argc - optind > 1 && !no_filenames) \|\| with_filenames)
	252	diff -urN grep-2.5.1a.orig/src/search.c grep-2.5.1a/src/search.c
	253	--- grep-2.5.1a.orig/src/search.c 2001-04-19 09:42:14.000000000 +0600
	254	+++ grep-2.5.1a/src/search.c 2005-10-23 09:51:25.000000000 +0600
	255	@@ -18,9 +18,13 @@
	256
	257	/* Written August 1992 by Mike Haertel. */
	258
	259	+#ifndef _GNU_SOURCE
	260	+# define _GNU_SOURCE 1
	261	+#endif
	262	#ifdef HAVE_CONFIG_H
	263	# include <config.h>
	264	#endif
	265	+#include <assert.h>
	266	#include <sys/types.h>
	267	#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
	268	/* We can handle multibyte string. */
	269	@@ -31,7 +35,7 @@
	270
	271	#include "system.h"
	272	#include "grep.h"
	273	-#include "regex.h"
	274	+#include <regex.h>
	275	#include "dfa.h"
	276	#include "kwset.h"
	277	#include "error.h"
	278	@@ -39,6 +43,9 @@
	279	#ifdef HAVE_LIBPCRE
	280	# include <pcre.h>
	281	#endif
	282	+#ifdef HAVE_LANGINFO_CODESET
	283	+# include <langinfo.h>
	284	+#endif
	285
	286	#define NCHAR (UCHAR_MAX + 1)
	287
	288	@@ -70,9 +77,10 @@
	289	call the regexp matcher at all. */
	290	static int kwset_exact_matches;
	291
	292	-#if defined(MBS_SUPPORT)
	293	-static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
	294	-#endif
	295	+/* UTF-8 encoding allows some optimizations that we can't otherwise
	296	+ assume in a multibyte encoding. */
	297	+static int using_utf8;
	298	+
	299	static void kwsinit PARAMS ((void));
	300	static void kwsmusts PARAMS ((void));
	301	static void Gcompile PARAMS ((char const *, size_t));
	302	@@ -84,6 +92,15 @@
	303	static size_t Pexecute PARAMS ((char const , size_t, size_t , int));
	304
	305	void
	306	+check_utf8 (void)
	307	+{
	308	+#ifdef HAVE_LANGINFO_CODESET
	309	+ if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0)
	310	+ using_utf8 = 1;
	311	+#endif
	312	+}
	313	+
	314	+void
	315	dfaerror (char const *mesg)
	316	{
	317	error (2, 0, mesg);
	318	@@ -141,38 +158,6 @@
	319	}
	320	}
	321
	322	-#ifdef MBS_SUPPORT
	323	-/* This function allocate the array which correspond to "buf".
	324	- Then this check multibyte string and mark on the positions which
	325	- are not singlebyte character nor the first byte of a multibyte
	326	- character. Caller must free the array. */
	327	-static char*
	328	-check_multibyte_string(char const *buf, size_t size)
	329	-{
	330	- char *mb_properties = malloc(size);
	331	- mbstate_t cur_state;
	332	- int i;
	333	- memset(&cur_state, 0, sizeof(mbstate_t));
	334	- memset(mb_properties, 0, sizeof(char)*size);
	335	- for (i = 0; i < size ;)
	336	- {
	337	- size_t mbclen;
	338	- mbclen = mbrlen(buf + i, size - i, &cur_state);
	339	-
	340	- if (mbclen == (size_t) -1 \|\| mbclen == (size_t) -2 \|\| mbclen == 0)
	341	- {
	342	- /* An invalid sequence, or a truncated multibyte character.
	343	- We treat it as a singlebyte character. */
	344	- mbclen = 1;
	345	- }
	346	- mb_properties[i] = mbclen;
	347	- i += mbclen;
	348	- }
	349	-
	350	- return mb_properties;
	351	-}
	352	-#endif
	353	-
	354	static void
	355	Gcompile (char const *pattern, size_t size)
	356	{
	357	@@ -181,7 +166,8 @@
	358	size_t total = size;
	359	char const *motif = pattern;
	360
	361	- re_set_syntax (RE_SYNTAX_GREP \| RE_HAT_LISTS_NOT_NEWLINE);
	362	+ check_utf8 ();
	363	+ re_set_syntax (RE_SYNTAX_GREP \| RE_HAT_LISTS_NOT_NEWLINE \| (match_icase ? RE_ICASE : 0));
	364	dfasyntax (RE_SYNTAX_GREP \| RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
	365
	366	/* For GNU regex compiler we have to pass the patterns separately to detect
	367	@@ -233,7 +219,7 @@
	368	static char const line_end[] = "\\)$";
	369	static char const word_beg[] = "\\(^\\\|[^[:alnum:]_]\\)\\(";
	370	static char const word_end[] = "\\)\\([^[:alnum:]_]\\\|$\\)";
	371	- char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
	372	+ char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
	373	size_t i;
	374	strcpy (n, match_lines ? line_beg : word_beg);
	375	i = strlen (n);
	376	@@ -257,14 +243,15 @@
	377	size_t total = size;
	378	char const *motif = pattern;
	379
	380	+ check_utf8 ();
	381	if (strcmp (matcher, "awk") == 0)
	382	{
	383	- re_set_syntax (RE_SYNTAX_AWK);
	384	+ re_set_syntax (RE_SYNTAX_AWK \| (match_icase ? RE_ICASE : 0));
	385	dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
	386	}
	387	else
	388	{
	389	- re_set_syntax (RE_SYNTAX_POSIX_EGREP);
	390	+ re_set_syntax (RE_SYNTAX_POSIX_EGREP \| (match_icase ? RE_ICASE : 0));
	391	dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
	392	}
	393
	394	@@ -316,7 +303,7 @@
	395	static char const line_end[] = ")$";
	396	static char const word_beg[] = "(^\|[^[:alnum:]_])(";
	397	static char const word_end[] = ")([^[:alnum:]_]\|$)";
	398	- char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
	399	+ char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
	400	size_t i;
	401	strcpy (n, match_lines ? line_beg : word_beg);
	402	i = strlen(n);
	403	@@ -339,15 +326,35 @@
	404	char eol = eolbyte;
	405	int backref, start, len;
	406	struct kwsmatch kwsm;
	407	- size_t i;
	408	+ size_t i, ret_val;
	409	+ static int use_dfa;
	410	+ static int use_dfa_checked = 0;
	411	#ifdef MBS_SUPPORT
	412	- char *mb_properties = NULL;
	413	+ const char *last_char = NULL;
	414	+ int mb_cur_max = MB_CUR_MAX;
	415	+ mbstate_t mbs;
	416	+ memset (&mbs, '\0', sizeof (mbstate_t));
	417	#endif /* MBS_SUPPORT */
	418
	419	+ if (!use_dfa_checked)
	420	+ {
	421	+ char *grep_use_dfa = getenv ("GREP_USE_DFA");
	422	+ if (!grep_use_dfa)
	423	+ {
	424	#ifdef MBS_SUPPORT
	425	- if (MB_CUR_MAX > 1 && kwset)
	426	- mb_properties = check_multibyte_string(buf, size);
	427	+ /* Turn off DFA when processing multibyte input. */
	428	+ use_dfa = (MB_CUR_MAX == 1);
	429	+#else
	430	+ use_dfa = 1;
	431	#endif /* MBS_SUPPORT */
	432	+ }
	433	+ else
	434	+ {
	435	+ use_dfa = atoi (grep_use_dfa);
	436	+ }
	437	+
	438	+ use_dfa_checked = 1;
	439	+ }
	440
	441	buflim = buf + size;
	442
	443	@@ -358,47 +365,124 @@
	444	if (kwset)
	445	{
	446	/* Find a possible match using the KWset matcher. */
	447	- size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
	448	+#ifdef MBS_SUPPORT
	449	+ size_t bytes_left = 0;
	450	+#endif /* MBS_SUPPORT */
	451	+ size_t offset;
	452	+#ifdef MBS_SUPPORT
	453	+ /* kwsexec doesn't work with match_icase and multibyte input. */
	454	+ if (match_icase && mb_cur_max > 1)
	455	+ /* Avoid kwset */
	456	+ offset = 0;
	457	+ else
	458	+#endif /* MBS_SUPPORT */
	459	+ offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
	460	if (offset == (size_t) -1)
	461	- {
	462	+ goto failure;
	463	#ifdef MBS_SUPPORT
	464	- if (MB_CUR_MAX > 1)
	465	- free(mb_properties);
	466	-#endif
	467	- return (size_t)-1;
	468	+ if (mb_cur_max > 1 && !using_utf8)
	469	+ {
	470	+ bytes_left = offset;
	471	+ while (bytes_left)
	472	+ {
	473	+ size_t mlen = mbrlen (beg, bytes_left, &mbs);
	474	+
	475	+ last_char = beg;
	476	+ if (mlen == (size_t) -1 \|\| mlen == 0)
	477	+ {
	478	+ /* Incomplete character: treat as single-byte. */
	479	+ memset (&mbs, '\0', sizeof (mbstate_t));
	480	+ beg++;
	481	+ bytes_left--;
	482	+ continue;
	483	+ }
	484	+
	485	+ if (mlen == (size_t) -2)
	486	+ /* Offset points inside multibyte character:
	487	+ * no good. */
	488	+ break;
	489	+
	490	+ beg += mlen;
	491	+ bytes_left -= mlen;
	492	+ }
	493	}
	494	+ else
	495	+#endif /* MBS_SUPPORT */
	496	beg += offset;
	497	/* Narrow down to the line containing the candidate, and
	498	run it through DFA. */
	499	end = memchr(beg, eol, buflim - beg);
	500	end++;
	501	#ifdef MBS_SUPPORT
	502	- if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
	503	+ if (mb_cur_max > 1 && bytes_left)
	504	continue;
	505	-#endif
	506	+#endif /* MBS_SUPPORT */
	507	while (beg > buf && beg[-1] != eol)
	508	--beg;
	509	- if (kwsm.index < kwset_exact_matches)
	510	- goto success;
	511	- if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
	512	+ if (
	513	+#ifdef MBS_SUPPORT
	514	+ !(match_icase && mb_cur_max > 1) &&
	515	+#endif /* MBS_SUPPORT */
	516	+ (kwsm.index < kwset_exact_matches))
	517	+ goto success_in_beg_and_end;
	518	+ if (use_dfa &&
	519	+ dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
	520	continue;
	521	}
	522	else
	523	{
	524	/* No good fixed strings; start with DFA. */
	525	- size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
	526	+#ifdef MBS_SUPPORT
	527	+ size_t bytes_left = 0;
	528	+#endif /* MBS_SUPPORT */
	529	+ size_t offset = 0;
	530	+ if (use_dfa)
	531	+ offset = dfaexec (&dfa, beg, buflim - beg, &backref);
	532	if (offset == (size_t) -1)
	533	break;
	534	/* Narrow down to the line we've found. */
	535	+#ifdef MBS_SUPPORT
	536	+ if (mb_cur_max > 1 && !using_utf8)
	537	+ {
	538	+ bytes_left = offset;
	539	+ while (bytes_left)
	540	+ {
	541	+ size_t mlen = mbrlen (beg, bytes_left, &mbs);
	542	+
	543	+ last_char = beg;
	544	+ if (mlen == (size_t) -1 \|\| mlen == 0)
	545	+ {
	546	+ /* Incomplete character: treat as single-byte. */
	547	+ memset (&mbs, '\0', sizeof (mbstate_t));
	548	+ beg++;
	549	+ bytes_left--;
	550	+ continue;
	551	+ }
	552	+
	553	+ if (mlen == (size_t) -2)
	554	+ /* Offset points inside multibyte character:
	555	+ * no good. */
	556	+ break;
	557	+
	558	+ beg += mlen;
	559	+ bytes_left -= mlen;
	560	+ }
	561	+ }
	562	+ else
	563	+#endif /* MBS_SUPPORT */
	564	beg += offset;
	565	end = memchr (beg, eol, buflim - beg);
	566	end++;
	567	+#ifdef MBS_SUPPORT
	568	+ if (mb_cur_max > 1 && bytes_left)
	569	+ continue;
	570	+#endif /* MBS_SUPPORT */
	571	while (beg > buf && beg[-1] != eol)
	572	--beg;
	573	}
	574	/* Successful, no backreferences encountered! */
	575	- if (!backref)
	576	- goto success;
	577	+ if (use_dfa && !backref)
	578	+ goto success_in_beg_and_end;
	579	}
	580	else
	581	end = beg + size;
	582	@@ -413,14 +497,11 @@
	583	end - beg - 1, &(patterns[i].regs))))
	584	{
	585	len = patterns[i].regs.end[0] - start;
	586	- if (exact)
	587	- {
	588	- *match_size = len;
	589	- return start;
	590	- }
	591	+ if (exact && !match_words)
	592	+ goto success_in_start_and_len;
	593	if ((!match_lines && !match_words)
	594	\|\| (match_lines && len == end - beg - 1))
	595	- goto success;
	596	+ goto success_in_beg_and_end;
	597	/* If -w, check if the match aligns with word boundaries.
	598	We do this iteratively because:
	599	(a) the line may contain more than one occurence of the
	600	@@ -431,10 +512,84 @@
	601	if (match_words)
	602	while (start >= 0)
	603	{
	604	- if ((start == 0 \|\| !WCHAR ((unsigned char) beg[start - 1]))
	605	- && (len == end - beg - 1
	606	- \|\| !WCHAR ((unsigned char) beg[start + len])))
	607	- goto success;
	608	+ int lword_match = 0;
	609	+ if (start == 0)
	610	+ lword_match = 1;
	611	+ else
	612	+ {
	613	+ assert (start > 0);
	614	+#ifdef MBS_SUPPORT
	615	+ if (mb_cur_max > 1)
	616	+ {
	617	+ const char *s;
	618	+ int mr;
	619	+ wchar_t pwc;
	620	+
	621	+ if (using_utf8)
	622	+ {
	623	+ s = beg + start - 1;
	624	+ while (s > buf
	625	+ && (unsigned char) *s >= 0x80
	626	+ && (unsigned char) *s <= 0xbf)
	627	+ --s;
	628	+ }
	629	+ else
	630	+ s = last_char;
	631	+ mr = mbtowc (&pwc, s, beg + start - s);
	632	+ if (mr <= 0)
	633	+ {
	634	+ memset (&mbs, '\0', sizeof (mbstate_t));
	635	+ lword_match = 1;
	636	+ }
	637	+ else if (!(iswalnum (pwc) \|\| pwc == L'_')
	638	+ && mr == (int) (beg + start - s))
	639	+ lword_match = 1;
	640	+ }
	641	+ else
	642	+#endif /* MBS_SUPPORT */
	643	+ if (!WCHAR ((unsigned char) beg[start - 1]))
	644	+ lword_match = 1;
	645	+ }
	646	+
	647	+ if (lword_match)
	648	+ {
	649	+ int rword_match = 0;
	650	+ if (start + len == end - beg - 1)
	651	+ rword_match = 1;
	652	+ else
	653	+ {
	654	+#ifdef MBS_SUPPORT
	655	+ if (mb_cur_max > 1)
	656	+ {
	657	+ wchar_t nwc;
	658	+ int mr;
	659	+
	660	+ mr = mbtowc (&nwc, beg + start + len,
	661	+ end - beg - start - len - 1);
	662	+ if (mr <= 0)
	663	+ {
	664	+ memset (&mbs, '\0', sizeof (mbstate_t));
	665	+ rword_match = 1;
	666	+ }
	667	+ else if (!iswalnum (nwc) && nwc != L'_')
	668	+ rword_match = 1;
	669	+ }
	670	+ else
	671	+#endif /* MBS_SUPPORT */
	672	+ if (!WCHAR ((unsigned char) beg[start + len]))
	673	+ rword_match = 1;
	674	+ }
	675	+
	676	+ if (rword_match)
	677	+ {
	678	+ if (!exact)
	679	+ /* Returns the whole line. */
	680	+ goto success_in_beg_and_end;
	681	+ else
	682	+ /* Returns just this word match. */
	683	+ goto success_in_start_and_len;
	684	+ }
	685	+ }
	686	if (len > 0)
	687	{
	688	/* Try a shorter length anchored at the same place. */
	689	@@ -461,26 +616,154 @@
	690	}
	691	} /* for Regex patterns. */
	692	} /* for (beg = end ..) */
	693	-#ifdef MBS_SUPPORT
	694	- if (MB_CUR_MAX > 1 && mb_properties)
	695	- free (mb_properties);
	696	-#endif /* MBS_SUPPORT */
	697	+
	698	+ failure:
	699	return (size_t) -1;
	700
	701	- success:
	702	-#ifdef MBS_SUPPORT
	703	- if (MB_CUR_MAX > 1 && mb_properties)
	704	- free (mb_properties);
	705	-#endif /* MBS_SUPPORT */
	706	- *match_size = end - beg;
	707	- return beg - buf;
	708	+ success_in_beg_and_end:
	709	+ len = end - beg;
	710	+ start = beg - buf;
	711	+ /* FALLTHROUGH */
	712	+
	713	+ success_in_start_and_len:
	714	+ *match_size = len;
	715	+ return start;
	716	}
	717
	718	+#ifdef MBS_SUPPORT
	719	+static int f_i_multibyte; /* whether we're using the new -Fi MB method */
	720	+static struct
	721	+{
	722	+ wchar_t **patterns;
	723	+ size_t count, maxlen;
	724	+ unsigned char *match;
	725	+} Fimb;
	726	+#endif
	727	+
	728	static void
	729	Fcompile (char const *pattern, size_t size)
	730	{
	731	+ int mb_cur_max = MB_CUR_MAX;
	732	char const beg, lim, *err;
	733
	734	+ check_utf8 ();
	735	+#ifdef MBS_SUPPORT
	736	+ /* Support -F -i for UTF-8 input. */
	737	+ if (match_icase && mb_cur_max > 1)
	738	+ {
	739	+ mbstate_t mbs;
	740	+ wchar_t wcpattern = xmalloc ((size + 1) sizeof (wchar_t));
	741	+ const char *patternend = pattern;
	742	+ size_t wcsize;
	743	+ kwset_t fimb_kwset = NULL;
	744	+ char *starts = NULL;
	745	+ wchar_t wcbeg, wclim;
	746	+ size_t allocated = 0;
	747	+
	748	+ memset (&mbs, '\0', sizeof (mbs));
	749	+# ifdef __GNU_LIBRARY__
	750	+ wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs);
	751	+ if (patternend != pattern + size)
	752	+ wcsize = (size_t) -1;
	753	+# else
	754	+ {
	755	+ char *patterncopy = xmalloc (size + 1);
	756	+
	757	+ memcpy (patterncopy, pattern, size);
	758	+ patterncopy[size] = '\0';
	759	+ patternend = patterncopy;
	760	+ wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs);
	761	+ if (patternend != patterncopy + size)
	762	+ wcsize = (size_t) -1;
	763	+ free (patterncopy);
	764	+ }
	765	+# endif
	766	+ if (wcsize + 2 <= 2)
	767	+ {
	768	+fimb_fail:
	769	+ free (wcpattern);
	770	+ free (starts);
	771	+ if (fimb_kwset)
	772	+ kwsfree (fimb_kwset);
	773	+ free (Fimb.patterns);
	774	+ Fimb.patterns = NULL;
	775	+ }
	776	+ else
	777	+ {
	778	+ if (!(fimb_kwset = kwsalloc (NULL)))
	779	+ error (2, 0, _("memory exhausted"));
	780	+
	781	+ starts = xmalloc (mb_cur_max * 3);
	782	+ wcbeg = wcpattern;
	783	+ do
	784	+ {
	785	+ int i;
	786	+ size_t wclen;
	787	+
	788	+ if (Fimb.count >= allocated)
	789	+ {
	790	+ if (allocated == 0)
	791	+ allocated = 128;
	792	+ else
	793	+ allocated *= 2;
	794	+ Fimb.patterns = xrealloc (Fimb.patterns,
	795	+ sizeof (wchar_t ) allocated);
	796	+ }
	797	+ Fimb.patterns[Fimb.count++] = wcbeg;
	798	+ for (wclim = wcbeg;
	799	+ wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim)
	800	+ wclim = towlower (wclim);
	801	+ *wclim = L'\0';
	802	+ wclen = wclim - wcbeg;
	803	+ if (wclen > Fimb.maxlen)
	804	+ Fimb.maxlen = wclen;
	805	+ if (wclen > 3)
	806	+ wclen = 3;
	807	+ if (wclen == 0)
	808	+ {
	809	+ if ((err = kwsincr (fimb_kwset, "", 0)) != 0)
	810	+ error (2, 0, err);
	811	+ }
	812	+ else
	813	+ for (i = 0; i < (1 << wclen); i++)
	814	+ {
	815	+ char *p = starts;
	816	+ int j, k;
	817	+
	818	+ for (j = 0; j < wclen; ++j)
	819	+ {
	820	+ wchar_t wc = wcbeg[j];
	821	+ if (i & (1 << j))
	822	+ {
	823	+ wc = towupper (wc);
	824	+ if (wc == wcbeg[j])
	825	+ continue;
	826	+ }
	827	+ k = wctomb (p, wc);
	828	+ if (k <= 0)
	829	+ goto fimb_fail;
	830	+ p += k;
	831	+ }
	832	+ if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0)
	833	+ error (2, 0, err);
	834	+ }
	835	+ if (wclim < wcpattern + wcsize)
	836	+ ++wclim;
	837	+ wcbeg = wclim;
	838	+ }
	839	+ while (wcbeg < wcpattern + wcsize);
	840	+ f_i_multibyte = 1;
	841	+ kwset = fimb_kwset;
	842	+ free (starts);
	843	+ Fimb.match = xmalloc (Fimb.count);
	844	+ if ((err = kwsprep (kwset)) != 0)
	845	+ error (2, 0, err);
	846	+ return;
	847	+ }
	848	+ }
	849	+#endif /* MBS_SUPPORT */
	850	+
	851	+
	852	kwsinit ();
	853	beg = pattern;
	854	do
	855	@@ -499,6 +782,76 @@
	856	error (2, 0, err);
	857	}
	858
	859	+#ifdef MBS_SUPPORT
	860	+static int
	861	+Fimbexec (const char buf, size_t size, size_t plen, int exact)
	862	+{
	863	+ size_t len, letter, i;
	864	+ int ret = -1;
	865	+ mbstate_t mbs;
	866	+ wchar_t wc;
	867	+ int patterns_left;
	868	+
	869	+ assert (match_icase && f_i_multibyte == 1);
	870	+ assert (MB_CUR_MAX > 1);
	871	+
	872	+ memset (&mbs, '\0', sizeof (mbs));
	873	+ memset (Fimb.match, '\1', Fimb.count);
	874	+ letter = len = 0;
	875	+ patterns_left = 1;
	876	+ while (patterns_left && len <= size)
	877	+ {
	878	+ size_t c;
	879	+
	880	+ patterns_left = 0;
	881	+ if (len < size)
	882	+ {
	883	+ c = mbrtowc (&wc, buf + len, size - len, &mbs);
	884	+ if (c + 2 <= 2)
	885	+ return ret;
	886	+
	887	+ wc = towlower (wc);
	888	+ }
	889	+ else
	890	+ {
	891	+ c = 1;
	892	+ wc = L'\0';
	893	+ }
	894	+
	895	+ for (i = 0; i < Fimb.count; i++)
	896	+ {
	897	+ if (Fimb.match[i])
	898	+ {
	899	+ if (Fimb.patterns[i][letter] == L'\0')
	900	+ {
	901	+ /* Found a match. */
	902	+ *plen = len;
	903	+ if (!exact && !match_words)
	904	+ return 0;
	905	+ else
	906	+ {
	907	+ /* For -w or exact look for longest match. */
	908	+ ret = 0;
	909	+ Fimb.match[i] = '\0';
	910	+ continue;
	911	+ }
	912	+ }
	913	+
	914	+ if (Fimb.patterns[i][letter] == wc)
	915	+ patterns_left = 1;
	916	+ else
	917	+ Fimb.match[i] = '\0';
	918	+ }
	919	+ }
	920	+
	921	+ len += c;
	922	+ letter++;
	923	+ }
	924	+
	925	+ return ret;
	926	+}
	927	+#endif /* MBS_SUPPORT */
	928	+
	929	static size_t
	930	Fexecute (char const buf, size_t size, size_t match_size, int exact)
	931	{
	932	@@ -506,88 +859,268 @@
	933	register size_t len;
	934	char eol = eolbyte;
	935	struct kwsmatch kwsmatch;
	936	+ size_t ret_val;
	937	#ifdef MBS_SUPPORT
	938	- char *mb_properties;
	939	- if (MB_CUR_MAX > 1)
	940	- mb_properties = check_multibyte_string (buf, size);
	941	+ int mb_cur_max = MB_CUR_MAX;
	942	+ mbstate_t mbs;
	943	+ memset (&mbs, '\0', sizeof (mbstate_t));
	944	+ const char *last_char = NULL;
	945	#endif /* MBS_SUPPORT */
	946
	947	for (beg = buf; beg <= buf + size; ++beg)
	948	{
	949	- size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
	950	+ size_t offset;
	951	+ offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
	952	+
	953	if (offset == (size_t) -1)
	954	- {
	955	+ goto failure;
	956	#ifdef MBS_SUPPORT
	957	- if (MB_CUR_MAX > 1)
	958	- free(mb_properties);
	959	-#endif /* MBS_SUPPORT */
	960	- return offset;
	961	+ if (mb_cur_max > 1 && !using_utf8)
	962	+ {
	963	+ size_t bytes_left = offset;
	964	+ while (bytes_left)
	965	+ {
	966	+ size_t mlen = mbrlen (beg, bytes_left, &mbs);
	967	+
	968	+ last_char = beg;
	969	+ if (mlen == (size_t) -1 \|\| mlen == 0)
	970	+ {
	971	+ /* Incomplete character: treat as single-byte. */
	972	+ memset (&mbs, '\0', sizeof (mbstate_t));
	973	+ beg++;
	974	+ bytes_left--;
	975	+ continue;
	976	+ }
	977	+
	978	+ if (mlen == (size_t) -2)
	979	+ /* Offset points inside multibyte character: no good. */
	980	+ break;
	981	+
	982	+ beg += mlen;
	983	+ bytes_left -= mlen;
	984	+ }
	985	+
	986	+ if (bytes_left)
	987	+ continue;
	988	}
	989	-#ifdef MBS_SUPPORT
	990	- if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
	991	- continue; /* It is a part of multibyte character. */
	992	+ else
	993	#endif /* MBS_SUPPORT */
	994	beg += offset;
	995	- len = kwsmatch.size[0];
	996	- if (exact)
	997	- {
	998	- *match_size = len;
	999	#ifdef MBS_SUPPORT
	1000	- if (MB_CUR_MAX > 1)
	1001	- free (mb_properties);
	1002	+ /* For f_i_multibyte, the string at beg now matches first 3 chars of
	1003	+ one of the search strings (less if there are shorter search strings).
	1004	+ See if this is a real match. */
	1005	+ if (f_i_multibyte
	1006	+ && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], exact))
	1007	+ goto next_char;
	1008	#endif /* MBS_SUPPORT */
	1009	- return beg - buf;
	1010	- }
	1011	+ len = kwsmatch.size[0];
	1012	+ if (exact && !match_words)
	1013	+ goto success_in_beg_and_len;
	1014	if (match_lines)
	1015	{
	1016	if (beg > buf && beg[-1] != eol)
	1017	- continue;
	1018	+ goto next_char;
	1019	if (beg + len < buf + size && beg[len] != eol)
	1020	- continue;
	1021	+ goto next_char;
	1022	goto success;
	1023	}
	1024	else if (match_words)
	1025	- for (try = beg; len; )
	1026	- {
	1027	- if (try > buf && WCHAR((unsigned char) try[-1]))
	1028	- break;
	1029	- if (try + len < buf + size && WCHAR((unsigned char) try[len]))
	1030	- {
	1031	- offset = kwsexec (kwset, beg, --len, &kwsmatch);
	1032	- if (offset == (size_t) -1)
	1033	- {
	1034	+ {
	1035	+ while (len)
	1036	+ {
	1037	+ int word_match = 0;
	1038	+ if (beg > buf)
	1039	+ {
	1040	#ifdef MBS_SUPPORT
	1041	- if (MB_CUR_MAX > 1)
	1042	- free (mb_properties);
	1043	+ if (mb_cur_max > 1)
	1044	+ {
	1045	+ const char *s;
	1046	+ int mr;
	1047	+ wchar_t pwc;
	1048	+
	1049	+ if (using_utf8)
	1050	+ {
	1051	+ s = beg - 1;
	1052	+ while (s > buf
	1053	+ && (unsigned char) *s >= 0x80
	1054	+ && (unsigned char) *s <= 0xbf)
	1055	+ --s;
	1056	+ }
	1057	+ else
	1058	+ s = last_char;
	1059	+ mr = mbtowc (&pwc, s, beg - s);
	1060	+ if (mr <= 0)
	1061	+ memset (&mbs, '\0', sizeof (mbstate_t));
	1062	+ else if ((iswalnum (pwc) \|\| pwc == L'_')
	1063	+ && mr == (int) (beg - s))
	1064	+ goto next_char;
	1065	+ }
	1066	+ else
	1067	#endif /* MBS_SUPPORT */
	1068	- return offset;
	1069	- }
	1070	- try = beg + offset;
	1071	- len = kwsmatch.size[0];
	1072	- }
	1073	- else
	1074	- goto success;
	1075	- }
	1076	+ if (WCHAR ((unsigned char) beg[-1]))
	1077	+ goto next_char;
	1078	+ }
	1079	+#ifdef MBS_SUPPORT
	1080	+ if (mb_cur_max > 1)
	1081	+ {
	1082	+ wchar_t nwc;
	1083	+ int mr;
	1084	+
	1085	+ mr = mbtowc (&nwc, beg + len, buf + size - beg - len);
	1086	+ if (mr <= 0)
	1087	+ {
	1088	+ memset (&mbs, '\0', sizeof (mbstate_t));
	1089	+ word_match = 1;
	1090	+ }
	1091	+ else if (!iswalnum (nwc) && nwc != L'_')
	1092	+ word_match = 1;
	1093	+ }
	1094	+ else
	1095	+#endif /* MBS_SUPPORT */
	1096	+ if (beg + len >= buf + size \|\| !WCHAR ((unsigned char) beg[len]))
	1097	+ word_match = 1;
	1098	+ if (word_match)
	1099	+ {
	1100	+ if (!exact)
	1101	+ /* Returns the whole line now we know there's a word match. */
	1102	+ goto success;
	1103	+ else
	1104	+ /* Returns just this word match. */
	1105	+ goto success_in_beg_and_len;
	1106	+ }
	1107	+ if (len > 0)
	1108	+ {
	1109	+ /* Try a shorter length anchored at the same place. */
	1110	+ --len;
	1111	+ offset = kwsexec (kwset, beg, len, &kwsmatch);
	1112	+
	1113	+ if (offset == -1)
	1114	+ goto next_char; /* Try a different anchor. */
	1115	+#ifdef MBS_SUPPORT
	1116	+ if (mb_cur_max > 1 && !using_utf8)
	1117	+ {
	1118	+ size_t bytes_left = offset;
	1119	+ while (bytes_left)
	1120	+ {
	1121	+ size_t mlen = mbrlen (beg, bytes_left, &mbs);
	1122	+
	1123	+ last_char = beg;
	1124	+ if (mlen == (size_t) -1 \|\| mlen == 0)
	1125	+ {
	1126	+ /* Incomplete character: treat as single-byte. */
	1127	+ memset (&mbs, '\0', sizeof (mbstate_t));
	1128	+ beg++;
	1129	+ bytes_left--;
	1130	+ continue;
	1131	+ }
	1132	+
	1133	+ if (mlen == (size_t) -2)
	1134	+ {
	1135	+ /* Offset points inside multibyte character:
	1136	+ * no good. */
	1137	+ break;
	1138	+ }
	1139	+
	1140	+ beg += mlen;
	1141	+ bytes_left -= mlen;
	1142	+ }
	1143	+
	1144	+ if (bytes_left)
	1145	+ {
	1146	+ memset (&mbs, '\0', sizeof (mbstate_t));
	1147	+ goto next_char; /* Try a different anchor. */
	1148	+ }
	1149	+ }
	1150	+ else
	1151	+#endif /* MBS_SUPPORT */
	1152	+ beg += offset;
	1153	+#ifdef MBS_SUPPORT
	1154	+ /* The string at beg now matches first 3 chars of one of
	1155	+ the search strings (less if there are shorter search
	1156	+ strings). See if this is a real match. */
	1157	+ if (f_i_multibyte
	1158	+ && Fimbexec (beg, len - offset, &kwsmatch.size[0],
	1159	+ exact))
	1160	+ goto next_char;
	1161	+#endif /* MBS_SUPPORT */
	1162	+ len = kwsmatch.size[0];
	1163	+ }
	1164	+ }
	1165	+ }
	1166	else
	1167	goto success;
	1168	- }
	1169	-
	1170	+next_char:;
	1171	#ifdef MBS_SUPPORT
	1172	- if (MB_CUR_MAX > 1)
	1173	- free (mb_properties);
	1174	+ /* Advance to next character. For MB_CUR_MAX == 1 case this is handled
	1175	+ by ++beg above. */
	1176	+ if (mb_cur_max > 1)
	1177	+ {
	1178	+ if (using_utf8)
	1179	+ {
	1180	+ unsigned char c = *beg;
	1181	+ if (c >= 0xc2)
	1182	+ {
	1183	+ if (c < 0xe0)
	1184	+ ++beg;
	1185	+ else if (c < 0xf0)
	1186	+ beg += 2;
	1187	+ else if (c < 0xf8)
	1188	+ beg += 3;
	1189	+ else if (c < 0xfc)
	1190	+ beg += 4;
	1191	+ else if (c < 0xfe)
	1192	+ beg += 5;
	1193	+ }
	1194	+ }
	1195	+ else
	1196	+ {
	1197	+ size_t l = mbrlen (beg, buf + size - beg, &mbs);
	1198	+
	1199	+ last_char = beg;
	1200	+ if (l + 2 >= 2)
	1201	+ beg += l - 1;
	1202	+ else
	1203	+ memset (&mbs, '\0', sizeof (mbstate_t));
	1204	+ }
	1205	+ }
	1206	#endif /* MBS_SUPPORT */
	1207	+ }
	1208	+
	1209	+ failure:
	1210	return -1;
	1211
	1212	success:
	1213	+#ifdef MBS_SUPPORT
	1214	+ if (mb_cur_max > 1 && !using_utf8)
	1215	+ {
	1216	+ end = beg + len;
	1217	+ while (end < buf + size)
	1218	+ {
	1219	+ size_t mlen = mbrlen (end, buf + size - end, &mbs);
	1220	+ if (mlen == (size_t) -1 \|\| mlen == (size_t) -2 \|\| mlen == 0)
	1221	+ {
	1222	+ memset (&mbs, '\0', sizeof (mbstate_t));
	1223	+ mlen = 1;
	1224	+ }
	1225	+ if (mlen == 1 && *end == eol)
	1226	+ break;
	1227	+
	1228	+ end += mlen;
	1229	+ }
	1230	+ }
	1231	+ else
	1232	+#endif /* MBS_SUPPORT */
	1233	end = memchr (beg + len, eol, (buf + size) - (beg + len));
	1234	+
	1235	end++;
	1236	while (buf < beg && beg[-1] != eol)
	1237	--beg;
	1238	- *match_size = end - beg;
	1239	-#ifdef MBS_SUPPORT
	1240	- if (MB_CUR_MAX > 1)
	1241	- free (mb_properties);
	1242	-#endif /* MBS_SUPPORT */
	1243	+ len = end - beg;
	1244	+ /* FALLTHROUGH */
	1245	+
	1246	+ success_in_beg_and_len:
	1247	+ *match_size = len;
	1248	return beg - buf;
	1249	}
	1250
	1251	diff -urN grep-2.5.1a.orig/src/search.c.orig grep-2.5.1a/src/search.c.orig
	1252	--- grep-2.5.1a.orig/src/search.c.orig 1970-01-01 05:00:00.000000000 +0500
	1253	+++ grep-2.5.1a/src/search.c.orig 2005-10-23 09:48:39.000000000 +0600
	1254	@@ -0,0 +1,714 @@
	1255	+/* search.c - searching subroutines using dfa, kwset and regex for grep.
	1256	+ Copyright 1992, 1998, 2000 Free Software Foundation, Inc.
	1257	+
	1258	+ This program is free software; you can redistribute it and/or modify
	1259	+ it under the terms of the GNU General Public License as published by
	1260	+ the Free Software Foundation; either version 2, or (at your option)
	1261	+ any later version.
	1262	+
	1263	+ This program is distributed in the hope that it will be useful,
	1264	+ but WITHOUT ANY WARRANTY; without even the implied warranty of
	1265	+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	1266	+ GNU General Public License for more details.
	1267	+
	1268	+ You should have received a copy of the GNU General Public License
	1269	+ along with this program; if not, write to the Free Software
	1270	+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
	1271	+ 02111-1307, USA. */
	1272	+
	1273	+/* Written August 1992 by Mike Haertel. */
	1274	+
	1275	+#ifdef HAVE_CONFIG_H
	1276	+# include <config.h>
	1277	+#endif
	1278	+#include <sys/types.h>
	1279	+#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
	1280	+/* We can handle multibyte string. */
	1281	+# define MBS_SUPPORT
	1282	+# include <wchar.h>
	1283	+# include <wctype.h>
	1284	+#endif
	1285	+
	1286	+#include "system.h"
	1287	+#include "grep.h"
	1288	+#include "regex.h"
	1289	+#include "dfa.h"
	1290	+#include "kwset.h"
	1291	+#include "error.h"
	1292	+#include "xalloc.h"
	1293	+#ifdef HAVE_LIBPCRE
	1294	+# include <pcre.h>
	1295	+#endif
	1296	+
	1297	+#define NCHAR (UCHAR_MAX + 1)
	1298	+
	1299	+/* For -w, we also consider _ to be word constituent. */
	1300	+#define WCHAR(C) (ISALNUM(C) \|\| (C) == '_')
	1301	+
	1302	+/* DFA compiled regexp. */
	1303	+static struct dfa dfa;
	1304	+
	1305	+/* The Regex compiled patterns. */
	1306	+static struct patterns
	1307	+{
	1308	+ /* Regex compiled regexp. */
	1309	+ struct re_pattern_buffer regexbuf;
	1310	+ struct re_registers regs; /* This is here on account of a BRAIN-DEAD
	1311	+ Q@#%!# library interface in regex.c. */
	1312	+} patterns0;
	1313	+
	1314	+struct patterns *patterns;
	1315	+size_t pcount;
	1316	+
	1317	+/* KWset compiled pattern. For Ecompile and Gcompile, we compile
	1318	+ a list of strings, at least one of which is known to occur in
	1319	+ any string matching the regexp. */
	1320	+static kwset_t kwset;
	1321	+
	1322	+/* Number of compiled fixed strings known to exactly match the regexp.
	1323	+ If kwsexec returns < kwset_exact_matches, then we don't need to
	1324	+ call the regexp matcher at all. */
	1325	+static int kwset_exact_matches;
	1326	+
	1327	+#if defined(MBS_SUPPORT)
	1328	+static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
	1329	+#endif
	1330	+static void kwsinit PARAMS ((void));
	1331	+static void kwsmusts PARAMS ((void));
	1332	+static void Gcompile PARAMS ((char const *, size_t));
	1333	+static void Ecompile PARAMS ((char const *, size_t));
	1334	+static size_t EGexecute PARAMS ((char const , size_t, size_t , int ));
	1335	+static void Fcompile PARAMS ((char const *, size_t));
	1336	+static size_t Fexecute PARAMS ((char const , size_t, size_t , int));
	1337	+static void Pcompile PARAMS ((char const *, size_t ));
	1338	+static size_t Pexecute PARAMS ((char const , size_t, size_t , int));
	1339	+
	1340	+void
	1341	+dfaerror (char const *mesg)
	1342	+{
	1343	+ error (2, 0, mesg);
	1344	+}
	1345	+
	1346	+static void
	1347	+kwsinit (void)
	1348	+{
	1349	+ static char trans[NCHAR];
	1350	+ int i;
	1351	+
	1352	+ if (match_icase)
	1353	+ for (i = 0; i < NCHAR; ++i)
	1354	+ trans[i] = TOLOWER (i);
	1355	+
	1356	+ if (!(kwset = kwsalloc (match_icase ? trans : (char *) 0)))
	1357	+ error (2, 0, _("memory exhausted"));
	1358	+}
	1359	+
	1360	+/* If the DFA turns out to have some set of fixed strings one of
	1361	+ which must occur in the match, then we build a kwset matcher
	1362	+ to find those strings, and thus quickly filter out impossible
	1363	+ matches. */
	1364	+static void
	1365	+kwsmusts (void)
	1366	+{
	1367	+ struct dfamust const *dm;
	1368	+ char const *err;
	1369	+
	1370	+ if (dfa.musts)
	1371	+ {
	1372	+ kwsinit ();
	1373	+ /* First, we compile in the substrings known to be exact
	1374	+ matches. The kwset matcher will return the index
	1375	+ of the matching string that it chooses. */
	1376	+ for (dm = dfa.musts; dm; dm = dm->next)
	1377	+ {
	1378	+ if (!dm->exact)
	1379	+ continue;
	1380	+ ++kwset_exact_matches;
	1381	+ if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
	1382	+ error (2, 0, err);
	1383	+ }
	1384	+ /* Now, we compile the substrings that will require
	1385	+ the use of the regexp matcher. */
	1386	+ for (dm = dfa.musts; dm; dm = dm->next)
	1387	+ {
	1388	+ if (dm->exact)
	1389	+ continue;
	1390	+ if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
	1391	+ error (2, 0, err);
	1392	+ }
	1393	+ if ((err = kwsprep (kwset)) != 0)
	1394	+ error (2, 0, err);
	1395	+ }
	1396	+}
	1397	+
	1398	+#ifdef MBS_SUPPORT
	1399	+/* This function allocate the array which correspond to "buf".
	1400	+ Then this check multibyte string and mark on the positions which
	1401	+ are not singlebyte character nor the first byte of a multibyte
	1402	+ character. Caller must free the array. */
	1403	+static char*
	1404	+check_multibyte_string(char const *buf, size_t size)
	1405	+{
	1406	+ char *mb_properties = malloc(size);
	1407	+ mbstate_t cur_state;
	1408	+ int i;
	1409	+ memset(&cur_state, 0, sizeof(mbstate_t));
	1410	+ memset(mb_properties, 0, sizeof(char)*size);
	1411	+ for (i = 0; i < size ;)
	1412	+ {
	1413	+ size_t mbclen;
	1414	+ mbclen = mbrlen(buf + i, size - i, &cur_state);
	1415	+
	1416	+ if (mbclen == (size_t) -1 \|\| mbclen == (size_t) -2 \|\| mbclen == 0)
	1417	+ {
	1418	+ /* An invalid sequence, or a truncated multibyte character.
	1419	+ We treat it as a singlebyte character. */
	1420	+ mbclen = 1;
	1421	+ }
	1422	+ mb_properties[i] = mbclen;
	1423	+ i += mbclen;
	1424	+ }
	1425	+
	1426	+ return mb_properties;
	1427	+}
	1428	+#endif
	1429	+
	1430	+static void
	1431	+Gcompile (char const *pattern, size_t size)
	1432	+{
	1433	+ const char *err;
	1434	+ char const *sep;
	1435	+ size_t total = size;
	1436	+ char const *motif = pattern;
	1437	+
	1438	+ re_set_syntax (RE_SYNTAX_GREP \| RE_HAT_LISTS_NOT_NEWLINE);
	1439	+ dfasyntax (RE_SYNTAX_GREP \| RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
	1440	+
	1441	+ /* For GNU regex compiler we have to pass the patterns separately to detect
	1442	+ errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]"
	1443	+ GNU regex should have raise a syntax error. The same for backref, where
	1444	+ the backref should have been local to each pattern. */
	1445	+ do
	1446	+ {
	1447	+ size_t len;
	1448	+ sep = memchr (motif, '\n', total);
	1449	+ if (sep)
	1450	+ {
	1451	+ len = sep - motif;
	1452	+ sep++;
	1453	+ total -= (len + 1);
	1454	+ }
	1455	+ else
	1456	+ {
	1457	+ len = total;
	1458	+ total = 0;
	1459	+ }
	1460	+
	1461	+ patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
	1462	+ if (patterns == NULL)
	1463	+ error (2, errno, _("memory exhausted"));
	1464	+
	1465	+ patterns[pcount] = patterns0;
	1466	+
	1467	+ if ((err = re_compile_pattern (motif, len,
	1468	+ &(patterns[pcount].regexbuf))) != 0)
	1469	+ error (2, 0, err);
	1470	+ pcount++;
	1471	+
	1472	+ motif = sep;
	1473	+ } while (sep && total != 0);
	1474	+
	1475	+ /* In the match_words and match_lines cases, we use a different pattern
	1476	+ for the DFA matcher that will quickly throw out cases that won't work.
	1477	+ Then if DFA succeeds we do some hairy stuff using the regex matcher
	1478	+ to decide whether the match should really count. */
	1479	+ if (match_words \|\| match_lines)
	1480	+ {
	1481	+ /* In the whole-word case, we use the pattern:
	1482	+ \(^\\|[^[:alnum:]_]\)\(userpattern\)\([^[:alnum:]_]\|$\).
	1483	+ In the whole-line case, we use the pattern:
	1484	+ ^\(userpattern\)$. */
	1485	+
	1486	+ static char const line_beg[] = "^\\(";
	1487	+ static char const line_end[] = "\\)$";
	1488	+ static char const word_beg[] = "\\(^\\\|[^[:alnum:]_]\\)\\(";
	1489	+ static char const word_end[] = "\\)\\([^[:alnum:]_]\\\|$\\)";
	1490	+ char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
	1491	+ size_t i;
	1492	+ strcpy (n, match_lines ? line_beg : word_beg);
	1493	+ i = strlen (n);
	1494	+ memcpy (n + i, pattern, size);
	1495	+ i += size;
	1496	+ strcpy (n + i, match_lines ? line_end : word_end);
	1497	+ i += strlen (n + i);
	1498	+ pattern = n;
	1499	+ size = i;
	1500	+ }
	1501	+
	1502	+ dfacomp (pattern, size, &dfa, 1);
	1503	+ kwsmusts ();
	1504	+}
	1505	+
	1506	+static void
	1507	+Ecompile (char const *pattern, size_t size)
	1508	+{
	1509	+ const char *err;
	1510	+ const char *sep;
	1511	+ size_t total = size;
	1512	+ char const *motif = pattern;
	1513	+
	1514	+ if (strcmp (matcher, "awk") == 0)
	1515	+ {
	1516	+ re_set_syntax (RE_SYNTAX_AWK);
	1517	+ dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
	1518	+ }
	1519	+ else
	1520	+ {
	1521	+ re_set_syntax (RE_SYNTAX_POSIX_EGREP);
	1522	+ dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
	1523	+ }
	1524	+
	1525	+ /* For GNU regex compiler we have to pass the patterns separately to detect
	1526	+ errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]"
	1527	+ GNU regex should have raise a syntax error. The same for backref, where
	1528	+ the backref should have been local to each pattern. */
	1529	+ do
	1530	+ {
	1531	+ size_t len;
	1532	+ sep = memchr (motif, '\n', total);
	1533	+ if (sep)
	1534	+ {
	1535	+ len = sep - motif;
	1536	+ sep++;
	1537	+ total -= (len + 1);
	1538	+ }
	1539	+ else
	1540	+ {
	1541	+ len = total;
	1542	+ total = 0;
	1543	+ }
	1544	+
	1545	+ patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
	1546	+ if (patterns == NULL)
	1547	+ error (2, errno, _("memory exhausted"));
	1548	+ patterns[pcount] = patterns0;
	1549	+
	1550	+ if ((err = re_compile_pattern (motif, len,
	1551	+ &(patterns[pcount].regexbuf))) != 0)
	1552	+ error (2, 0, err);
	1553	+ pcount++;
	1554	+
	1555	+ motif = sep;
	1556	+ } while (sep && total != 0);
	1557	+
	1558	+ /* In the match_words and match_lines cases, we use a different pattern
	1559	+ for the DFA matcher that will quickly throw out cases that won't work.
	1560	+ Then if DFA succeeds we do some hairy stuff using the regex matcher
	1561	+ to decide whether the match should really count. */
	1562	+ if (match_words \|\| match_lines)
	1563	+ {
	1564	+ /* In the whole-word case, we use the pattern:
	1565	+ (^\|[^[:alnum:]_])(userpattern)([^[:alnum:]_]\|$).
	1566	+ In the whole-line case, we use the pattern:
	1567	+ ^(userpattern)$. */
	1568	+
	1569	+ static char const line_beg[] = "^(";
	1570	+ static char const line_end[] = ")$";
	1571	+ static char const word_beg[] = "(^\|[^[:alnum:]_])(";
	1572	+ static char const word_end[] = ")([^[:alnum:]_]\|$)";
	1573	+ char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
	1574	+ size_t i;
	1575	+ strcpy (n, match_lines ? line_beg : word_beg);
	1576	+ i = strlen(n);
	1577	+ memcpy (n + i, pattern, size);
	1578	+ i += size;
	1579	+ strcpy (n + i, match_lines ? line_end : word_end);
	1580	+ i += strlen (n + i);
	1581	+ pattern = n;
	1582	+ size = i;
	1583	+ }
	1584	+
	1585	+ dfacomp (pattern, size, &dfa, 1);
	1586	+ kwsmusts ();
	1587	+}
	1588	+
	1589	+static size_t
	1590	+EGexecute (char const buf, size_t size, size_t match_size, int exact)
	1591	+{
	1592	+ register char const buflim, beg, *end;
	1593	+ char eol = eolbyte;
	1594	+ int backref, start, len;
	1595	+ struct kwsmatch kwsm;
	1596	+ size_t i;
	1597	+#ifdef MBS_SUPPORT
	1598	+ char *mb_properties = NULL;
	1599	+#endif /* MBS_SUPPORT */
	1600	+
	1601	+#ifdef MBS_SUPPORT
	1602	+ if (MB_CUR_MAX > 1 && kwset)
	1603	+ mb_properties = check_multibyte_string(buf, size);
	1604	+#endif /* MBS_SUPPORT */
	1605	+
	1606	+ buflim = buf + size;
	1607	+
	1608	+ for (beg = end = buf; end < buflim; beg = end)
	1609	+ {
	1610	+ if (!exact)
	1611	+ {
	1612	+ if (kwset)
	1613	+ {
	1614	+ /* Find a possible match using the KWset matcher. */
	1615	+ size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
	1616	+ if (offset == (size_t) -1)
	1617	+ goto failure;
	1618	+ beg += offset;
	1619	+ /* Narrow down to the line containing the candidate, and
	1620	+ run it through DFA. */
	1621	+ end = memchr(beg, eol, buflim - beg);
	1622	+ end++;
	1623	+#ifdef MBS_SUPPORT
	1624	+ if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
	1625	+ continue;
	1626	+#endif
	1627	+ while (beg > buf && beg[-1] != eol)
	1628	+ --beg;
	1629	+ if (kwsm.index < kwset_exact_matches)
	1630	+ goto success_in_beg_and_end;
	1631	+ if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
	1632	+ continue;
	1633	+ }
	1634	+ else
	1635	+ {
	1636	+ /* No good fixed strings; start with DFA. */
	1637	+ size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
	1638	+ if (offset == (size_t) -1)
	1639	+ break;
	1640	+ /* Narrow down to the line we've found. */
	1641	+ beg += offset;
	1642	+ end = memchr (beg, eol, buflim - beg);
	1643	+ end++;
	1644	+ while (beg > buf && beg[-1] != eol)
	1645	+ --beg;
	1646	+ }
	1647	+ /* Successful, no backreferences encountered! */
	1648	+ if (!backref)
	1649	+ goto success_in_beg_and_end;
	1650	+ }
	1651	+ else
	1652	+ end = beg + size;
	1653	+
	1654	+ /* If we've made it to this point, this means DFA has seen
	1655	+ a probable match, and we need to run it through Regex. */
	1656	+ for (i = 0; i < pcount; i++)
	1657	+ {
	1658	+ patterns[i].regexbuf.not_eol = 0;
	1659	+ if (0 <= (start = re_search (&(patterns[i].regexbuf), beg,
	1660	+ end - beg - 1, 0,
	1661	+ end - beg - 1, &(patterns[i].regs))))
	1662	+ {
	1663	+ len = patterns[i].regs.end[0] - start;
	1664	+ if (exact && !match_words)
	1665	+ goto success_in_start_and_len;
	1666	+ if ((!match_lines && !match_words)
	1667	+ \|\| (match_lines && len == end - beg - 1))
	1668	+ goto success_in_beg_and_end;
	1669	+ /* If -w, check if the match aligns with word boundaries.
	1670	+ We do this iteratively because:
	1671	+ (a) the line may contain more than one occurence of the
	1672	+ pattern, and
	1673	+ (b) Several alternatives in the pattern might be valid at a
	1674	+ given point, and we may need to consider a shorter one to
	1675	+ find a word boundary. */
	1676	+ if (match_words)
	1677	+ while (start >= 0)
	1678	+ {
	1679	+ if ((start == 0 \|\| !WCHAR ((unsigned char) beg[start - 1]))
	1680	+ && (len == end - beg - 1
	1681	+ \|\| !WCHAR ((unsigned char) beg[start + len])))
	1682	+ goto success_in_beg_and_end;
	1683	+ if (len > 0)
	1684	+ {
	1685	+ /* Try a shorter length anchored at the same place. */
	1686	+ --len;
	1687	+ patterns[i].regexbuf.not_eol = 1;
	1688	+ len = re_match (&(patterns[i].regexbuf), beg,
	1689	+ start + len, start,
	1690	+ &(patterns[i].regs));
	1691	+ }
	1692	+ if (len <= 0)
	1693	+ {
	1694	+ /* Try looking further on. */
	1695	+ if (start == end - beg - 1)
	1696	+ break;
	1697	+ ++start;
	1698	+ patterns[i].regexbuf.not_eol = 0;
	1699	+ start = re_search (&(patterns[i].regexbuf), beg,
	1700	+ end - beg - 1,
	1701	+ start, end - beg - 1 - start,
	1702	+ &(patterns[i].regs));
	1703	+ len = patterns[i].regs.end[0] - start;
	1704	+ }
	1705	+ }
	1706	+ }
	1707	+ } /* for Regex patterns. */
	1708	+ } /* for (beg = end ..) */
	1709	+
	1710	+ failure:
	1711	+#ifdef MBS_SUPPORT
	1712	+ if (MB_CUR_MAX > 1 && mb_properties)
	1713	+ free (mb_properties);
	1714	+#endif /* MBS_SUPPORT */
	1715	+ return (size_t) -1;
	1716	+
	1717	+ success_in_beg_and_end:
	1718	+ len = end - beg;
	1719	+ start = beg - buf;
	1720	+ /* FALLTHROUGH */
	1721	+
	1722	+ success_in_start_and_len:
	1723	+#ifdef MBS_SUPPORT
	1724	+ if (MB_CUR_MAX > 1 && mb_properties)
	1725	+ free (mb_properties);
	1726	+#endif /* MBS_SUPPORT */
	1727	+ *match_size = len;
	1728	+ return start;
	1729	+}
	1730	+
	1731	+static void
	1732	+Fcompile (char const *pattern, size_t size)
	1733	+{
	1734	+ char const beg, lim, *err;
	1735	+
	1736	+ kwsinit ();
	1737	+ beg = pattern;
	1738	+ do
	1739	+ {
	1740	+ for (lim = beg; lim < pattern + size && *lim != '\n'; ++lim)
	1741	+ ;
	1742	+ if ((err = kwsincr (kwset, beg, lim - beg)) != 0)
	1743	+ error (2, 0, err);
	1744	+ if (lim < pattern + size)
	1745	+ ++lim;
	1746	+ beg = lim;
	1747	+ }
	1748	+ while (beg < pattern + size);
	1749	+
	1750	+ if ((err = kwsprep (kwset)) != 0)
	1751	+ error (2, 0, err);
	1752	+}
	1753	+
	1754	+static size_t
	1755	+Fexecute (char const buf, size_t size, size_t match_size, int exact)
	1756	+{
	1757	+ register char const beg, try, *end;
	1758	+ register size_t len;
	1759	+ char eol = eolbyte;
	1760	+ struct kwsmatch kwsmatch;
	1761	+#ifdef MBS_SUPPORT
	1762	+ char *mb_properties;
	1763	+ if (MB_CUR_MAX > 1)
	1764	+ mb_properties = check_multibyte_string (buf, size);
	1765	+#endif /* MBS_SUPPORT */
	1766	+
	1767	+ for (beg = buf; beg <= buf + size; ++beg)
	1768	+ {
	1769	+ size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
	1770	+ if (offset == (size_t) -1)
	1771	+ goto failure;
	1772	+#ifdef MBS_SUPPORT
	1773	+ if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
	1774	+ continue; /* It is a part of multibyte character. */
	1775	+#endif /* MBS_SUPPORT */
	1776	+ beg += offset;
	1777	+ len = kwsmatch.size[0];
	1778	+ if (exact && !match_words)
	1779	+ goto success_in_beg_and_len;
	1780	+ if (match_lines)
	1781	+ {
	1782	+ if (beg > buf && beg[-1] != eol)
	1783	+ continue;
	1784	+ if (beg + len < buf + size && beg[len] != eol)
	1785	+ continue;
	1786	+ goto success;
	1787	+ }
	1788	+ else if (match_words)
	1789	+ for (try = beg; len; )
	1790	+ {
	1791	+ if (try > buf && WCHAR((unsigned char) try[-1]))
	1792	+ break;
	1793	+ if (try + len < buf + size && WCHAR((unsigned char) try[len]))
	1794	+ {
	1795	+ offset = kwsexec (kwset, beg, --len, &kwsmatch);
	1796	+ if (offset == (size_t) -1)
	1797	+ {
	1798	+#ifdef MBS_SUPPORT
	1799	+ if (MB_CUR_MAX > 1)
	1800	+ free (mb_properties);
	1801	+#endif /* MBS_SUPPORT */
	1802	+ return offset;
	1803	+ }
	1804	+ try = beg + offset;
	1805	+ len = kwsmatch.size[0];
	1806	+ }
	1807	+ else
	1808	+ goto success;
	1809	+ }
	1810	+ else
	1811	+ goto success;
	1812	+ }
	1813	+
	1814	+ failure:
	1815	+#ifdef MBS_SUPPORT
	1816	+ if (MB_CUR_MAX > 1)
	1817	+ free (mb_properties);
	1818	+#endif /* MBS_SUPPORT */
	1819	+ return -1;
	1820	+
	1821	+ success:
	1822	+ end = memchr (beg + len, eol, (buf + size) - (beg + len));
	1823	+ end++;
	1824	+ while (buf < beg && beg[-1] != eol)
	1825	+ --beg;
	1826	+ len = end - beg;
	1827	+ /* FALLTHROUGH */
	1828	+
	1829	+ success_in_beg_and_len:
	1830	+ *match_size = len;
	1831	+#ifdef MBS_SUPPORT
	1832	+ if (MB_CUR_MAX > 1)
	1833	+ free (mb_properties);
	1834	+#endif /* MBS_SUPPORT */
	1835	+ return beg - buf;
	1836	+}
	1837	+
	1838	+#if HAVE_LIBPCRE
	1839	+/* Compiled internal form of a Perl regular expression. */
	1840	+static pcre *cre;
	1841	+
	1842	+/* Additional information about the pattern. */
	1843	+static pcre_extra *extra;
	1844	+#endif
	1845	+
	1846	+static void
	1847	+Pcompile (char const *pattern, size_t size)
	1848	+{
	1849	+#if !HAVE_LIBPCRE
	1850	+ error (2, 0, _("The -P option is not supported"));
	1851	+#else
	1852	+ int e;
	1853	+ char const *ep;
	1854	+ char re = xmalloc (4 size + 7);
	1855	+ int flags = PCRE_MULTILINE \| (match_icase ? PCRE_CASELESS : 0);
	1856	+ char const *patlim = pattern + size;
	1857	+ char *n = re;
	1858	+ char const *p;
	1859	+ char const *pnul;
	1860	+
	1861	+ /* FIXME: Remove this restriction. */
	1862	+ if (eolbyte != '\n')
	1863	+ error (2, 0, _("The -P and -z options cannot be combined"));
	1864	+
	1865	+ *n = '\0';
	1866	+ if (match_lines)
	1867	+ strcpy (n, "^(");
	1868	+ if (match_words)
	1869	+ strcpy (n, "\\b(");
	1870	+ n += strlen (n);
	1871	+
	1872	+ /* The PCRE interface doesn't allow NUL bytes in the pattern, so
	1873	+ replace each NUL byte in the pattern with the four characters
	1874	+ "\000", removing a preceding backslash if there are an odd
	1875	+ number of backslashes before the NUL.
	1876	+
	1877	+ FIXME: This method does not work with some multibyte character
	1878	+ encodings, notably Shift-JIS, where a multibyte character can end
	1879	+ in a backslash byte. */
	1880	+ for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
	1881	+ {
	1882	+ memcpy (n, p, pnul - p);
	1883	+ n += pnul - p;
	1884	+ for (p = pnul; pattern < p && p[-1] == '\\'; p--)
	1885	+ continue;
	1886	+ n -= (pnul - p) & 1;
	1887	+ strcpy (n, "\\000");
	1888	+ n += 4;
	1889	+ }
	1890	+
	1891	+ memcpy (n, p, patlim - p);
	1892	+ n += patlim - p;
	1893	+ *n = '\0';
	1894	+ if (match_words)
	1895	+ strcpy (n, ")\\b");
	1896	+ if (match_lines)
	1897	+ strcpy (n, ")$");
	1898	+
	1899	+ cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
	1900	+ if (!cre)
	1901	+ error (2, 0, ep);
	1902	+
	1903	+ extra = pcre_study (cre, 0, &ep);
	1904	+ if (ep)
	1905	+ error (2, 0, ep);
	1906	+
	1907	+ free (re);
	1908	+#endif
	1909	+}
	1910	+
	1911	+static size_t
	1912	+Pexecute (char const buf, size_t size, size_t match_size, int exact)
	1913	+{
	1914	+#if !HAVE_LIBPCRE
	1915	+ abort ();
	1916	+ return -1;
	1917	+#else
	1918	+ /* This array must have at least two elements; everything after that
	1919	+ is just for performance improvement in pcre_exec. */
	1920	+ int sub[300];
	1921	+
	1922	+ int e = pcre_exec (cre, extra, buf, size, 0, 0,
	1923	+ sub, sizeof sub / sizeof *sub);
	1924	+
	1925	+ if (e <= 0)
	1926	+ {
	1927	+ switch (e)
	1928	+ {
	1929	+ case PCRE_ERROR_NOMATCH:
	1930	+ return -1;
	1931	+
	1932	+ case PCRE_ERROR_NOMEMORY:
	1933	+ error (2, 0, _("Memory exhausted"));
	1934	+
	1935	+ default:
	1936	+ abort ();
	1937	+ }
	1938	+ }
	1939	+ else
	1940	+ {
	1941	+ /* Narrow down to the line we've found. */
	1942	+ char const *beg = buf + sub[0];
	1943	+ char const *end = buf + sub[1];
	1944	+ char const *buflim = buf + size;
	1945	+ char eol = eolbyte;
	1946	+ if (!exact)
	1947	+ {
	1948	+ end = memchr (end, eol, buflim - end);
	1949	+ end++;
	1950	+ while (buf < beg && beg[-1] != eol)
	1951	+ --beg;
	1952	+ }
	1953	+
	1954	+ *match_size = end - beg;
	1955	+ return beg - buf;
	1956	+ }
	1957	+#endif
	1958	+}
	1959	+
	1960	+struct matcher const matchers[] = {
	1961	+ { "default", Gcompile, EGexecute },
	1962	+ { "grep", Gcompile, EGexecute },
	1963	+ { "egrep", Ecompile, EGexecute },
	1964	+ { "awk", Ecompile, EGexecute },
	1965	+ { "fgrep", Fcompile, Fexecute },
	1966	+ { "perl", Pcompile, Pexecute },
	1967	+ { "", 0, 0 },
	1968	+};
	1969	diff -urN grep-2.5.1a.orig/tests/fmbtest.sh grep-2.5.1a/tests/fmbtest.sh
	1970	--- grep-2.5.1a.orig/tests/fmbtest.sh 1970-01-01 05:00:00.000000000 +0500
	1971	+++ grep-2.5.1a/tests/fmbtest.sh 2005-10-23 09:51:12.000000000 +0600
	1972	@@ -0,0 +1,111 @@
	1973	+#!/bin/sh
	1974	+
	1975	+: ${srcdir=.}
	1976	+
	1977	+# If cs_CZ.UTF-8 locale doesn't work, skip this test silently
	1978	+LC_ALL=cs_CZ.UTF-8 locale -k LC_CTYPE 2>/dev/null \| ${GREP} -q charmap.*UTF-8 \
	1979	+ \|\| exit 77
	1980	+
	1981	+failures=0
	1982	+
	1983	+cat > csinput <<EOF
	1984	+01 ÅœluÅ¥ouÄkÃ¡ ÄÃÅ¡e
	1985	+ÄÃÅ E 02
	1986	+03 Z ÄÃÅ¡Ã ÄiÅ¡Ã cosi
	1987	+04 ÄÃ
	1988	+Å e 05
	1989	+06 ÄÄÄÄÄÄÄÃÅ¡ÄÃÅ ÄÃÅ¡
	1990	+07 ÄÄÄ ÄÄÄÄÃÅ¡ÄÃÅ ÄÃÅ¡EEEE
	1991	+ÄAs 08
	1992	+09Äapka
	1993	+10ÄaSy se mÄnÃ
	1994	+ÄÃÅ¡E11
	1995	+Äas12
	1996	+ðÄÃÅ¡Eð13
	1997	+ÅœÄÃÅ¡Eð14
	1998	+ðÄÃÅ¡EÅœ15
	1999	+ÅœÄÃÅ¡EÅœ16
	2000	+ÄÃÅ¡Eð17
	2001	+ÄÃÅ¡EÅœ18
	2002	+19ðÄÃÅ¡e
	2003	+20ÅœÄÃÅ¡e
	2004	+EOF
	2005	+cat > cspatfile <<EOF
	2006	+ÄÃÅ¡E
	2007	+Äas
	2008	+EOF
	2009	+
	2010	+for mode in F G E; do
	2011	+
	2012	+test1="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode} -f cspatfile csinput \
	2013	+ \| LC_ALL=C sed 's/^.\([0-9][0-9]\).$/\1/'`)"
	2014	+if test "$test1" != "11 12 13 14 15 16 17 18"; then
	2015	+ echo "Test #1 ${mode} failed: $test1"
	2016	+ failures=1
	2017	+fi
	2018	+
	2019	+test2="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}i -f cspatfile csinput \
	2020	+ \| LC_ALL=C sed 's/^.\([0-9][0-9]\).$/\1/'`)"
	2021	+if test "$test2" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
	2022	+ echo "Test #2 ${mode} failed: $test2"
	2023	+ failures=1
	2024	+fi
	2025	+
	2026	+test3="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}i -e 'ÄÃÅ¡E' -e 'Äas' csinput \
	2027	+ \| LC_ALL=C sed 's/^.\([0-9][0-9]\).$/\1/'`)"
	2028	+if test "$test3" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
	2029	+ echo "Test #3 ${mode} failed: $test3"
	2030	+ failures=1
	2031	+fi
	2032	+
	2033	+test4="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}iw -f cspatfile csinput \
	2034	+ \| LC_ALL=C sed 's/^.\([0-9][0-9]\).$/\1/'`)"
	2035	+if test "$test4" != "01 02 08 13 17 19"; then
	2036	+ echo "Test #4 ${mode} failed: $test4"
	2037	+ failures=1
	2038	+fi
	2039	+
	2040	+done
	2041	+
	2042	+# Test that -F --color=always prefers longer matches.
	2043	+test5="`echo 'Cosi tu ÄiÅ¡Ã...' \
	2044	+ \| LC_ALL=cs_CZ.UTF-8 ${GREP} --color=always -Fi -e 'ÄiÅ¡' -e 'ÄiÅ¡Ã'`"
	2045	+if echo "$test5" \| LC_ALL=C ${GREP} -q 'Cosi tu .\[.mÄiÅ¡Ã.\[.m\(.\[K\)\?\.\.\.'; then
	2046	+ :
	2047	+else
	2048	+ echo "Test #5 F failed: $test5"
	2049	+ failures=1
	2050	+fi
	2051	+
	2052	+for mode in G E; do
	2053	+
	2054	+# Test that -{G,E} --color=always prefers earlier pattern matches.
	2055	+test6="`echo 'Cosi tu ÄiÅ¡Ã...' \
	2056	+ \| LC_ALL=cs_CZ.UTF-8 ${GREP} --color=always -${mode}i -e 'ÄiÅ¡' -e 'ÄiÅ¡Ã'`"
	2057	+if echo "$test6" \| LC_ALL=C ${GREP} -q 'Cosi tu .\[.mÄiÅ¡.\[.m\(.\[K\)\?Ã\.\.\.'; then
	2058	+ :
	2059	+else
	2060	+ echo "Test #6 ${mode} failed: $test6"
	2061	+ failures=1
	2062	+fi
	2063	+
	2064	+# Test that -{G,E} --color=always prefers earlier pattern matches.
	2065	+test7="`echo 'Cosi tu ÄiÅ¡Ã...' \
	2066	+ \| LC_ALL=cs_CZ.UTF-8 ${GREP} --color=always -${mode}i -e 'ÄiÅ¡Ã' -e 'ÄiÅ¡'`"
	2067	+if echo "$test7" \| LC_ALL=C ${GREP} -q 'Cosi tu .\[.mÄiÅ¡Ã.\[.m\(.\[K\)\?\.\.\.'; then
	2068	+ :
	2069	+else
	2070	+ echo "Test #7 ${mode} failed: $test7"
	2071	+ failures=1
	2072	+fi
	2073	+
	2074	+test8="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}i -e 'Ä.Å¡E' -e 'Ä[a-f]s' csinput \
	2075	+ \| LC_ALL=C sed 's/^.\([0-9][0-9]\).$/\1/'`)"
	2076	+if test "$test8" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
	2077	+ echo "Test #8 ${mode} failed: $test8"
	2078	+ failures=1
	2079	+fi
	2080	+
	2081	+done
	2082	+
	2083	+exit $failures
	2084	diff -urN grep-2.5.1a.orig/tests/Makefile.am grep-2.5.1a/tests/Makefile.am
	2085	--- grep-2.5.1a.orig/tests/Makefile.am 2001-03-07 09:11:27.000000000 +0500
	2086	+++ grep-2.5.1a/tests/Makefile.am 2005-10-23 09:51:12.000000000 +0600
	2087	@@ -3,7 +3,8 @@
	2088	AWK=@AWK@
	2089
	2090	TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh \
	2091	- status.sh empty.sh options.sh backref.sh file.sh
	2092	+ status.sh empty.sh options.sh backref.sh file.sh \
	2093	+ fmbtest.sh
	2094	EXTRA_DIST = $(TESTS) \
	2095	khadafy.lines khadafy.regexp \
	2096	spencer1.awk spencer1.tests \
	2097	diff -urN grep-2.5.1a.orig/tests/Makefile.in grep-2.5.1a/tests/Makefile.in
	2098	--- grep-2.5.1a.orig/tests/Makefile.in 2002-03-26 21:09:36.000000000 +0500
	2099	+++ grep-2.5.1a/tests/Makefile.in 2005-10-23 09:51:13.000000000 +0600
	2100	@@ -97,7 +97,8 @@
	2101	AWK = @AWK@
	2102
	2103	TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh \
	2104	- status.sh empty.sh options.sh backref.sh file.sh
	2105	+ status.sh empty.sh options.sh backref.sh file.sh \
	2106	+ fmbtest.sh
	2107
	2108	EXTRA_DIST = $(TESTS) \
	2109	khadafy.lines khadafy.regexp \

Note: See TracBrowser for help on using the repository browser.

Download in other formats: