source:
patches/grep-2.5.3-i18n-1.patch@
b09c166
Last change on this file since b09c166 was 318408a, checked in by , 16 years ago | |
---|---|
|
|
File size: 26.9 KB |
-
lib/posix/regex.h
Submitted By: Ken Moffat <ken at linuxfromscratch dot org> Date: 2008-02-19 Initial Package Version: 2.5.3 Upstream Status: uncertain Origin: from debian. Description: Various fixes, particularly speed improvements for UTF-8 locales. Also adds a 'standard input' marker into the results for certain obscure uses. diff -Naur grep-2.5.3.orig/lib/posix/regex.h grep-2.5.3.lfs/lib/posix/regex.h
old new 165 165 treated as 'a\{1'. */ 166 166 #define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) 167 167 168 /* If this bit is set, then ignore case when matching. 169 If not set, then case is significant. */ 170 #define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) 171 168 172 /* This global variable defines the particular regexp syntax to use (for 169 173 some interfaces). When a regexp is compiled, the syntax used is 170 174 stored in the pattern buffer, so changing this does not affect -
grep-2.5.3.
diff -Naur grep-2.5.3.orig/src/dfa.c grep-2.5.3.lfs/src/dfa.c
old new 594 594 /* build character class. */ 595 595 { 596 596 wctype_t wt; 597 /* NOTE: 598 * when case_fold, character class [:upper:] and [:lower:] 599 * should be treated as [:alpha:], this is the same way 600 * of glibc/posix/regcomp.c:build_charclass(). 601 * reported by Bug#276202 602 * - fixed by Fumitoshi UKAI 603 */ 604 if (case_fold 605 && (strcmp (str, "upper") == 0 || strcmp (str, "lower") == 0)) 606 strcpy (str, "alpha"); 607 597 608 /* Query the character class as wctype_t. */ 598 609 wt = wctype (str); 599 610 … … 681 692 REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t, 682 693 range_ends_al, work_mbc->nranges + 1); 683 694 work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2; 695 if (case_fold 696 && (iswlower((wint_t)wc) || iswupper((wint_t)wc)) 697 && (iswlower((wint_t)wc2) || iswupper((wint_t)wc2))) { 698 wint_t altcase; 699 altcase = wc; 700 if (iswlower((wint_t)wc)) 701 altcase = towupper((wint_t)wc); 702 else 703 altcase = towlower((wint_t)wc); 704 REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t, 705 range_sts_al, work_mbc->nranges + 1); 706 work_mbc->range_sts[work_mbc->nranges] = (wchar_t)altcase; 707 708 altcase = wc2; 709 if (iswlower((wint_t)wc2)) 710 altcase = towupper((wint_t)wc2); 711 else 712 altcase = towlower((wint_t)wc2); 713 REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t, 714 range_ends_al, work_mbc->nranges + 1); 715 work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)altcase; 716 717 } 684 718 } 685 719 else if (wc != WEOF) 686 720 /* build normal characters. */ … … 688 722 REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al, 689 723 work_mbc->nchars + 1); 690 724 work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc; 725 if (case_fold && (iswlower((wint_t) wc) || iswupper((wint_t) wc))) 726 { 727 wint_t altcase; 728 729 altcase = wc; /* keeps compiler happy */ 730 if (iswlower((wint_t) wc)) 731 altcase = towupper((wint_t) wc); 732 else if (iswupper((wint_t) wc)) 733 altcase = towlower((wint_t) wc); 734 735 REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al, 736 work_mbc->nchars + 1); 737 work_mbc->chars[work_mbc->nchars++] = (wchar_t) altcase; 738 } 691 739 } 692 740 } 693 741 while ((wc = wc1) != L']'); -
src/grep.c
diff -Naur grep-2.5.3.orig/src/grep.c grep-2.5.3.lfs/src/grep.c
old new 274 274 #endif 275 275 ; 276 276 277 /* Default for `file_list' if no files are given on the command line. */ 278 static char *stdin_argv[] = 279 { 280 "-", NULL 281 }; 282 277 283 /* Non-boolean long options that have no corresponding short equivalents. */ 278 284 enum 279 285 { … … 534 540 for byte sentinels fore and aft. */ 535 541 newalloc = newsize + pagesize + 1; 536 542 537 newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer; 543 newbuf = bufalloc < newalloc ? malloc (bufalloc = newalloc) : buffer; 544 if (newbuf == NULL) 545 { 546 int saved_errno = errno; 547 free (buffer); 548 bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + 1; 549 buffer = xmalloc (bufalloc); 550 errno = saved_errno; 551 return 0; 552 } 538 553 readbuf = ALIGN_TO (newbuf + 1 + save, pagesize); 539 554 bufbeg = readbuf - save; 540 555 memmove (bufbeg, buffer + saved_offset, save); … … 1825 1840 FILE *fp; 1826 1841 extern char *optarg; 1827 1842 extern int optind; 1843 char **file_list; 1828 1844 1829 1845 initialize_main (&argc, &argv); 1830 1846 program_name = argv[0]; … … 2244 2260 if (max_count == 0) 2245 2261 exit (1); 2246 2262 2247 if (optind < argc) 2263 file_list = (optind == argc ? stdin_argv : &argv[optind]); 2264 2265 status = 1; 2266 while (1) 2248 2267 { 2249 status = 1; 2250 do 2268 char *file = *file_list++; 2269 2270 if (file == NULL) 2271 break; 2272 2273 if ((included_patterns || excluded_patterns) 2274 && !isdir (file)) 2251 2275 { 2252 char *file = argv[optind]; 2253 if ((included_patterns || excluded_patterns) 2254 && !isdir (file)) 2255 { 2256 if (included_patterns && 2257 ! excluded_filename (included_patterns, file, 0)) 2258 continue; 2259 if (excluded_patterns && 2260 excluded_filename (excluded_patterns, file, 0)) 2261 continue; 2262 } 2263 status &= grepfile (strcmp (file, "-") == 0 ? (char *) NULL : file, 2264 &stats_base); 2276 if (included_patterns && 2277 ! excluded_filename (included_patterns, file, 0)) 2278 continue; 2279 if (excluded_patterns && 2280 excluded_filename (excluded_patterns, file, 0)) 2281 continue; 2265 2282 } 2266 while ( ++optind < argc); 2283 status &= grepfile (strcmp (file, "-") == 0 2284 ? (char *) NULL : file, &stats_base); 2267 2285 } 2268 else2269 status = grepfile ((char *) NULL, &stats_base);2270 2286 2271 2287 /* We register via atexit() to test stdout. */ 2272 2288 exit (errseen ? 2 : status); -
src/search.c
diff -Naur grep-2.5.3.orig/src/search.c grep-2.5.3.lfs/src/search.c
old new 18 18 19 19 /* Written August 1992 by Mike Haertel. */ 20 20 21 #ifndef _GNU_SOURCE 22 # define _GNU_SOURCE 1 23 #endif 21 24 #ifdef HAVE_CONFIG_H 22 25 # include <config.h> 23 26 #endif 24 27 28 #include <assert.h> 29 25 30 #include <sys/types.h> 26 31 27 32 #include "mbsupport.h" … … 43 48 #ifdef HAVE_LIBPCRE 44 49 # include <pcre.h> 45 50 #endif 51 #ifdef HAVE_LANGINFO_CODESET 52 # include <langinfo.h> 53 #endif 46 54 47 55 #define NCHAR (UCHAR_MAX + 1) 48 56 … … 68 76 error (2, 0, _("memory exhausted")); 69 77 } 70 78 79 /* UTF-8 encoding allows some optimizations that we can't otherwise 80 assume in a multibyte encoding. */ 81 static int using_utf8; 82 83 void 84 check_utf8 (void) 85 { 86 #ifdef HAVE_LANGINFO_CODESET 87 if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0) 88 using_utf8 = 1; 89 #endif 90 } 91 71 92 #ifndef FGREP_PROGRAM 72 93 /* DFA compiled regexp. */ 73 94 static struct dfa dfa; … … 134 155 } 135 156 #endif /* !FGREP_PROGRAM */ 136 157 137 #ifdef MBS_SUPPORT138 /* This function allocate the array which correspond to "buf".139 Then this check multibyte string and mark on the positions which140 are not single byte character nor the first byte of a multibyte141 character. Caller must free the array. */142 static char*143 check_multibyte_string(char const *buf, size_t size)144 {145 char *mb_properties = xmalloc(size);146 mbstate_t cur_state;147 wchar_t wc;148 int i;149 150 memset(&cur_state, 0, sizeof(mbstate_t));151 memset(mb_properties, 0, sizeof(char)*size);152 153 for (i = 0; i < size ;)154 {155 size_t mbclen;156 mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state);157 158 if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)159 {160 /* An invalid sequence, or a truncated multibyte character.161 We treat it as a single byte character. */162 mbclen = 1;163 }164 else if (match_icase)165 {166 if (iswupper((wint_t)wc))167 {168 wc = towlower((wint_t)wc);169 wcrtomb(buf + i, wc, &cur_state);170 }171 }172 mb_properties[i] = mbclen;173 i += mbclen;174 }175 176 return mb_properties;177 }178 #endif /* MBS_SUPPORT */179 180 158 #if defined(GREP_PROGRAM) || defined(EGREP_PROGRAM) 181 159 #ifdef EGREP_PROGRAM 182 160 COMPILE_FCT(Ecompile) … … 193 171 size_t total = size; 194 172 char const *motif = pattern; 195 173 196 #if 0 174 check_utf8 (); 197 175 if (match_icase) 198 176 syntax_bits |= RE_ICASE; 199 #endif200 177 re_set_syntax (syntax_bits); 201 178 dfasyntax (syntax_bits, match_icase, eolbyte); 202 179 … … 301 278 char eol = eolbyte; 302 279 int backref, start, len, best_len; 303 280 struct kwsmatch kwsm; 281 static int use_dfa; 282 static int use_dfa_checked = 0; 304 283 size_t i, ret_val; 305 284 #ifdef MBS_SUPPORT 306 char *mb_properties = NULL; 307 if (MB_CUR_MAX > 1) 285 const char *last_char = NULL; 286 int mb_cur_max = MB_CUR_MAX; 287 mbstate_t mbs; 288 memset (&mbs, '\0', sizeof (mbstate_t)); 289 #endif /* MBS_SUPPORT */ 290 291 if (!use_dfa_checked) 308 292 { 309 if (match_icase) 310 { 311 char *case_buf = xmalloc(size); 312 memcpy(case_buf, buf, size); 313 if (start_ptr) 314 start_ptr = case_buf + (start_ptr - buf); 315 buf = case_buf; 316 } 317 if (kwset) 318 mb_properties = check_multibyte_string(buf, size); 319 } 293 char *grep_use_dfa = getenv ("GREP_USE_DFA"); 294 if (!grep_use_dfa) 295 { 296 #ifdef MBS_SUPPORT 297 /* Turn off DFA when processing multibyte input. */ 298 use_dfa = (MB_CUR_MAX == 1); 299 #else 300 use_dfa = 1; 320 301 #endif /* MBS_SUPPORT */ 302 } 303 else 304 { 305 use_dfa = atoi (grep_use_dfa); 306 } 307 308 use_dfa_checked = 1; 309 } 321 310 322 311 buflim = buf + size; 323 312 … … 329 318 if (kwset) 330 319 { 331 320 /* Find a possible match using the KWset matcher. */ 332 size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm); 321 #ifdef MBS_SUPPORT 322 size_t bytes_left = 0; 323 #endif /* MBS_SUPPORT */ 324 size_t offset; 325 #ifdef MBS_SUPPORT 326 /* kwsexec doesn't work with match_icase and multibyte input. */ 327 if (match_icase && mb_cur_max > 1) 328 /* Avoid kwset */ 329 offset = 0; 330 else 331 #endif /* MBS_SUPPORT */ 332 offset = kwsexec (kwset, beg, buflim - beg, &kwsm); 333 333 if (offset == (size_t) -1) 334 goto failure; 334 return (size_t)-1; 335 #ifdef MBS_SUPPORT 336 if (mb_cur_max > 1 && !using_utf8) 337 { 338 bytes_left = offset; 339 while (bytes_left) 340 { 341 size_t mlen = mbrlen (beg, bytes_left, &mbs); 342 343 last_char = beg; 344 if (mlen == (size_t) -1 || mlen == 0) 345 { 346 /* Incomplete character: treat as single-byte. */ 347 memset (&mbs, '\0', sizeof (mbstate_t)); 348 beg++; 349 bytes_left--; 350 continue; 351 } 352 353 if (mlen == (size_t) -2) 354 /* Offset points inside multibyte character: 355 * no good. */ 356 break; 357 358 beg += mlen; 359 bytes_left -= mlen; 360 } 361 } 362 else 363 #endif /* MBS_SUPPORT */ 335 364 beg += offset; 336 365 /* Narrow down to the line containing the candidate, and 337 366 run it through DFA. */ 338 367 end = memchr(beg, eol, buflim - beg); 339 368 end++; 340 369 #ifdef MBS_SUPPORT 341 if ( MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)370 if (mb_cur_max > 1 && bytes_left) 342 371 continue; 343 372 #endif 344 373 while (beg > buf && beg[-1] != eol) 345 374 --beg; 346 if (kwsm.index < kwset_exact_matches) 375 if ( 376 #ifdef MBS_SUPPORT 377 !(match_icase && mb_cur_max > 1) && 378 #endif /* MBS_SUPPORT */ 379 (kwsm.index < kwset_exact_matches)) 347 380 goto success; 348 if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) 381 if (use_dfa && 382 dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) 349 383 continue; 350 384 } 351 385 else 352 386 { 353 387 /* No good fixed strings; start with DFA. */ 354 size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref); 388 #ifdef MBS_SUPPORT 389 size_t bytes_left = 0; 390 #endif /* MBS_SUPPORT */ 391 size_t offset = 0; 392 if (use_dfa) 393 offset = dfaexec (&dfa, beg, buflim - beg, &backref); 355 394 if (offset == (size_t) -1) 356 395 break; 357 396 /* Narrow down to the line we've found. */ 397 #ifdef MBS_SUPPORT 398 if (mb_cur_max > 1 && !using_utf8) 399 { 400 bytes_left = offset; 401 while (bytes_left) 402 { 403 size_t mlen = mbrlen (beg, bytes_left, &mbs); 404 405 last_char = beg; 406 if (mlen == (size_t) -1 || mlen == 0) 407 { 408 /* Incomplete character: treat as single-byte. */ 409 memset (&mbs, '\0', sizeof (mbstate_t)); 410 beg++; 411 bytes_left--; 412 continue; 413 } 414 415 if (mlen == (size_t) -2) 416 /* Offset points inside multibyte character: 417 * no good. */ 418 break; 419 420 beg += mlen; 421 bytes_left -= mlen; 422 } 423 } 424 else 425 #endif /* MBS_SUPPORT */ 358 426 beg += offset; 359 427 end = memchr (beg, eol, buflim - beg); 360 428 end++; 429 #ifdef MBS_SUPPORT 430 if (mb_cur_max > 1 && bytes_left) 431 continue; 432 #endif /* MBS_SUPPORT */ 361 433 while (beg > buf && beg[-1] != eol) 362 434 --beg; 363 435 } 364 436 /* Successful, no backreferences encountered! */ 365 if ( !backref)437 if (use_dfa && !backref) 366 438 goto success; 367 439 } 368 440 else … … 408 480 if (match_words) 409 481 while (match <= best_match) 410 482 { 411 if ((match == buf || !WCHAR ((unsigned char) match[-1])) 412 && (len == end - beg - 1 413 || !WCHAR ((unsigned char) match[len]))) 414 goto assess_pattern_match; 483 int lword_match = 0; 484 if (match == buf) 485 lword_match = 1; 486 else 487 { 488 assert (start > 0); 489 #ifdef MBS_SUPPORT 490 if (mb_cur_max > 1) 491 { 492 const char *s; 493 int mr; 494 wchar_t pwc; 495 if (using_utf8) 496 { 497 s = match - 1; 498 while (s > buf 499 && (unsigned char) *s >= 0x80 500 && (unsigned char) *s <= 0xbf) 501 --s; 502 } 503 else 504 s = last_char; 505 mr = mbtowc (&pwc, s, match - s); 506 if (mr <= 0) 507 { 508 memset (&mbs, '\0', sizeof (mbstate_t)); 509 lword_match = 1; 510 } 511 else if (!(iswalnum (pwc) || pwc == L'_') 512 && mr == (int) (match - s)) 513 lword_match = 1; 514 } 515 else 516 #endif /* MBS_SUPPORT */ 517 if (!WCHAR ((unsigned char) match[-1])) 518 lword_match = 1; 519 } 520 521 if (lword_match) 522 { 523 int rword_match = 0; 524 if (start + len == end - beg - 1) 525 rword_match = 1; 526 else 527 { 528 #ifdef MBS_SUPPORT 529 if (mb_cur_max > 1) 530 { 531 wchar_t nwc; 532 int mr; 533 534 mr = mbtowc (&nwc, buf + start + len, 535 end - buf - start - len - 1); 536 if (mr <= 0) 537 { 538 memset (&mbs, '\0', sizeof (mbstate_t)); 539 rword_match = 1; 540 } 541 else if (!iswalnum (nwc) && nwc != L'_') 542 rword_match = 1; 543 } 544 else 545 #endif /* MBS_SUPPORT */ 546 if (!WCHAR ((unsigned char) match[len])) 547 rword_match = 1; 548 } 549 550 if (rword_match) 551 { 552 if (!start_ptr) 553 /* Returns the whole line. */ 554 goto success; 555 else 556 { 557 goto assess_pattern_match; 558 } 559 } 560 } 415 561 if (len > 0) 416 562 { 417 563 /* Try a shorter length anchored at the same place. */ … … 475 621 *match_size = len; 476 622 ret_val = beg - buf; 477 623 out: 478 #ifdef MBS_SUPPORT479 if (MB_CUR_MAX > 1)480 {481 if (match_icase)482 free((char*)buf);483 if (mb_properties)484 free(mb_properties);485 }486 #endif /* MBS_SUPPORT */487 624 return ret_val; 488 625 } 489 626 #endif /* defined(GREP_PROGRAM) || defined(EGREP_PROGRAM) */ 490 627 628 #ifdef MBS_SUPPORT 629 static int f_i_multibyte; /* whether we're using the new -Fi MB method */ 630 static struct 631 { 632 wchar_t **patterns; 633 size_t count, maxlen; 634 unsigned char *match; 635 } Fimb; 636 #endif 637 491 638 #if defined(GREP_PROGRAM) || defined(FGREP_PROGRAM) 492 639 COMPILE_FCT(Fcompile) 493 640 { 641 int mb_cur_max = MB_CUR_MAX; 494 642 char const *beg, *lim, *err; 495 643 644 check_utf8 (); 645 #ifdef MBS_SUPPORT 646 /* Support -F -i for UTF-8 input. */ 647 if (match_icase && mb_cur_max > 1) 648 { 649 mbstate_t mbs; 650 wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t)); 651 const char *patternend = pattern; 652 size_t wcsize; 653 kwset_t fimb_kwset = NULL; 654 char *starts = NULL; 655 wchar_t *wcbeg, *wclim; 656 size_t allocated = 0; 657 658 memset (&mbs, '\0', sizeof (mbs)); 659 # ifdef __GNU_LIBRARY__ 660 wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs); 661 if (patternend != pattern + size) 662 wcsize = (size_t) -1; 663 # else 664 { 665 char *patterncopy = xmalloc (size + 1); 666 667 memcpy (patterncopy, pattern, size); 668 patterncopy[size] = '\0'; 669 patternend = patterncopy; 670 wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs); 671 if (patternend != patterncopy + size) 672 wcsize = (size_t) -1; 673 free (patterncopy); 674 } 675 # endif 676 if (wcsize + 2 <= 2) 677 { 678 fimb_fail: 679 free (wcpattern); 680 free (starts); 681 if (fimb_kwset) 682 kwsfree (fimb_kwset); 683 free (Fimb.patterns); 684 Fimb.patterns = NULL; 685 } 686 else 687 { 688 if (!(fimb_kwset = kwsalloc (NULL))) 689 error (2, 0, _("memory exhausted")); 690 691 starts = xmalloc (mb_cur_max * 3); 692 wcbeg = wcpattern; 693 do 694 { 695 int i; 696 size_t wclen; 697 698 if (Fimb.count >= allocated) 699 { 700 if (allocated == 0) 701 allocated = 128; 702 else 703 allocated *= 2; 704 Fimb.patterns = xrealloc (Fimb.patterns, 705 sizeof (wchar_t *) * allocated); 706 } 707 Fimb.patterns[Fimb.count++] = wcbeg; 708 for (wclim = wcbeg; 709 wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim) 710 *wclim = towlower (*wclim); 711 *wclim = L'\0'; 712 wclen = wclim - wcbeg; 713 if (wclen > Fimb.maxlen) 714 Fimb.maxlen = wclen; 715 if (wclen > 3) 716 wclen = 3; 717 if (wclen == 0) 718 { 719 if ((err = kwsincr (fimb_kwset, "", 0)) != 0) 720 error (2, 0, err); 721 } 722 else 723 for (i = 0; i < (1 << wclen); i++) 724 { 725 char *p = starts; 726 int j, k; 727 728 for (j = 0; j < wclen; ++j) 729 { 730 wchar_t wc = wcbeg[j]; 731 if (i & (1 << j)) 732 { 733 wc = towupper (wc); 734 if (wc == wcbeg[j]) 735 continue; 736 } 737 k = wctomb (p, wc); 738 if (k <= 0) 739 goto fimb_fail; 740 p += k; 741 } 742 if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0) 743 error (2, 0, err); 744 } 745 if (wclim < wcpattern + wcsize) 746 ++wclim; 747 wcbeg = wclim; 748 } 749 while (wcbeg < wcpattern + wcsize); 750 f_i_multibyte = 1; 751 kwset = fimb_kwset; 752 free (starts); 753 Fimb.match = xmalloc (Fimb.count); 754 if ((err = kwsprep (kwset)) != 0) 755 error (2, 0, err); 756 return; 757 } 758 } 759 #endif /* MBS_SUPPORT */ 760 761 496 762 kwsinit (); 497 763 beg = pattern; 498 764 do … … 511 777 error (2, 0, err); 512 778 } 513 779 780 #ifdef MBS_SUPPORT 781 static int 782 Fimbexec (const char *buf, size_t size, size_t *plen, int exact) 783 { 784 size_t len, letter, i; 785 int ret = -1; 786 mbstate_t mbs; 787 wchar_t wc; 788 int patterns_left; 789 790 assert (match_icase && f_i_multibyte == 1); 791 assert (MB_CUR_MAX > 1); 792 793 memset (&mbs, '\0', sizeof (mbs)); 794 memset (Fimb.match, '\1', Fimb.count); 795 letter = len = 0; 796 patterns_left = 1; 797 while (patterns_left && len <= size) 798 { 799 size_t c; 800 801 patterns_left = 0; 802 if (len < size) 803 { 804 c = mbrtowc (&wc, buf + len, size - len, &mbs); 805 if (c + 2 <= 2) 806 return ret; 807 808 wc = towlower (wc); 809 } 810 else 811 { 812 c = 1; 813 wc = L'\0'; 814 } 815 816 for (i = 0; i < Fimb.count; i++) 817 { 818 if (Fimb.match[i]) 819 { 820 if (Fimb.patterns[i][letter] == L'\0') 821 { 822 /* Found a match. */ 823 *plen = len; 824 if (!exact && !match_words) 825 return 0; 826 else 827 { 828 /* For -w or exact look for longest match. */ 829 ret = 0; 830 Fimb.match[i] = '\0'; 831 continue; 832 } 833 } 834 835 if (Fimb.patterns[i][letter] == wc) 836 patterns_left = 1; 837 else 838 Fimb.match[i] = '\0'; 839 } 840 } 841 842 len += c; 843 letter++; 844 } 845 846 return ret; 847 } 848 #endif /* MBS_SUPPORT */ 849 514 850 EXECUTE_FCT(Fexecute) 515 851 { 516 852 register char const *beg, *try, *end; … … 519 855 struct kwsmatch kwsmatch; 520 856 size_t ret_val; 521 857 #ifdef MBS_SUPPORT 522 char *mb_properties = NULL; 523 if (MB_CUR_MAX > 1) 524 { 525 if (match_icase) 526 { 527 char *case_buf = xmalloc(size); 528 memcpy(case_buf, buf, size); 529 if (start_ptr) 530 start_ptr = case_buf + (start_ptr - buf); 531 buf = case_buf; 532 } 533 mb_properties = check_multibyte_string(buf, size); 534 } 858 int mb_cur_max = MB_CUR_MAX; 859 mbstate_t mbs; 860 memset (&mbs, '\0', sizeof (mbstate_t)); 861 const char *last_char = NULL; 535 862 #endif /* MBS_SUPPORT */ 536 863 537 864 for (beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++) 538 865 { 539 866 size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); 540 867 if (offset == (size_t) -1) 541 goto failure;868 return offset; 542 869 #ifdef MBS_SUPPORT 543 if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0) 544 continue; /* It is a part of multibyte character. */ 870 if (mb_cur_max > 1 && !using_utf8) 871 { 872 size_t bytes_left = offset; 873 while (bytes_left) 874 { 875 size_t mlen = mbrlen (beg, bytes_left, &mbs); 876 877 last_char = beg; 878 if (mlen == (size_t) -1 || mlen == 0) 879 { 880 /* Incomplete character: treat as single-byte. */ 881 memset (&mbs, '\0', sizeof (mbstate_t)); 882 beg++; 883 bytes_left--; 884 continue; 885 } 886 887 if (mlen == (size_t) -2) 888 /* Offset points inside multibyte character: no good. */ 889 break; 890 891 beg += mlen; 892 bytes_left -= mlen; 893 } 894 895 if (bytes_left) 896 continue; 897 } 898 else 545 899 #endif /* MBS_SUPPORT */ 546 900 beg += offset; 901 #ifdef MBS_SUPPORT 902 /* For f_i_multibyte, the string at beg now matches first 3 chars of 903 one of the search strings (less if there are shorter search strings). 904 See if this is a real match. */ 905 if (f_i_multibyte 906 && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], start_ptr == NULL)) 907 goto next_char; 908 #endif /* MBS_SUPPORT */ 547 909 len = kwsmatch.size[0]; 548 910 if (start_ptr && !match_words) 549 911 goto success_in_beg_and_len; 550 912 if (match_lines) 551 913 { 552 914 if (beg > buf && beg[-1] != eol) 553 continue;915 goto next_char; 554 916 if (beg + len < buf + size && beg[len] != eol) 555 continue;917 goto next_char; 556 918 goto success; 557 919 } 558 920 else if (match_words) 559 for (try = beg; len; ) 560 { 561 if (try > buf && WCHAR((unsigned char) try[-1])) 562 break; 563 if (try + len < buf + size && WCHAR((unsigned char) try[len])) 564 { 565 offset = kwsexec (kwset, beg, --len, &kwsmatch); 566 if (offset == (size_t) -1) 567 break; 568 try = beg + offset; 569 len = kwsmatch.size[0]; 570 } 571 else if (!start_ptr) 572 goto success; 573 else 574 goto success_in_beg_and_len; 575 } /* for (try) */ 576 else 921 { 922 while (len) 923 { 924 int word_match = 0; 925 if (beg > buf) 926 { 927 #ifdef MBS_SUPPORT 928 if (mb_cur_max > 1) 929 { 930 const char *s; 931 int mr; 932 wchar_t pwc; 933 934 if (using_utf8) 935 { 936 s = beg - 1; 937 while (s > buf 938 && (unsigned char) *s >= 0x80 939 && (unsigned char) *s <= 0xbf) 940 --s; 941 } 942 else 943 s = last_char; 944 mr = mbtowc (&pwc, s, beg - s); 945 if (mr <= 0) 946 memset (&mbs, '\0', sizeof (mbstate_t)); 947 else if ((iswalnum (pwc) || pwc == L'_') 948 && mr == (int) (beg - s)) 949 goto next_char; 950 } 951 else 952 #endif /* MBS_SUPPORT */ 953 if (WCHAR ((unsigned char) beg[-1])) 954 goto next_char; 955 } 956 #ifdef MBS_SUPPORT 957 if (mb_cur_max > 1) 958 { 959 wchar_t nwc; 960 int mr; 961 962 mr = mbtowc (&nwc, beg + len, buf + size - beg - len); 963 if (mr <= 0) 964 { 965 memset (&mbs, '\0', sizeof (mbstate_t)); 966 word_match = 1; 967 } 968 else if (!iswalnum (nwc) && nwc != L'_') 969 word_match = 1; 970 } 971 else 972 #endif /* MBS_SUPPORT */ 973 if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len])) 974 word_match = 1; 975 if (word_match) 976 { 977 if (start_ptr == NULL) 978 /* Returns the whole line now we know there's a word match. */ 979 goto success; 980 else { 981 /* Returns just this word match. */ 982 *match_size = len; 983 return beg - buf; 984 } 985 } 986 if (len > 0) 987 { 988 /* Try a shorter length anchored at the same place. */ 989 --len; 990 offset = kwsexec (kwset, beg, len, &kwsmatch); 991 992 if (offset == -1) 993 goto next_char; /* Try a different anchor. */ 994 #ifdef MBS_SUPPORT 995 996 if (mb_cur_max > 1 && !using_utf8) 997 { 998 size_t bytes_left = offset; 999 while (bytes_left) 1000 { 1001 size_t mlen = mbrlen (beg, bytes_left, &mbs); 1002 1003 last_char = beg; 1004 if (mlen == (size_t) -1 || mlen == 0) 1005 { 1006 /* Incomplete character: treat as single-byte. */ 1007 memset (&mbs, '\0', sizeof (mbstate_t)); 1008 beg++; 1009 bytes_left--; 1010 continue; 1011 } 1012 1013 if (mlen == (size_t) -2) 1014 { 1015 /* Offset points inside multibyte character: 1016 * no good. */ 1017 break; 1018 } 1019 1020 beg += mlen; 1021 bytes_left -= mlen; 1022 } 1023 1024 if (bytes_left) 1025 { 1026 memset (&mbs, '\0', sizeof (mbstate_t)); 1027 goto next_char; /* Try a different anchor. */ 1028 } 1029 } 1030 else 1031 #endif /* MBS_SUPPORT */ 1032 beg += offset; 1033 #ifdef MBS_SUPPORT 1034 /* The string at beg now matches first 3 chars of one of 1035 the search strings (less if there are shorter search 1036 strings). See if this is a real match. */ 1037 if (f_i_multibyte 1038 && Fimbexec (beg, len - offset, &kwsmatch.size[0], 1039 start_ptr == NULL)) 1040 goto next_char; 1041 #endif /* MBS_SUPPORT */ 1042 len = kwsmatch.size[0]; 1043 } 1044 } 1045 } 1046 else 577 1047 goto success; 578 } /* for (beg in buf) */ 1048 next_char:; 1049 #ifdef MBS_SUPPORT 1050 /* Advance to next character. For MB_CUR_MAX == 1 case this is handled 1051 by ++beg above. */ 1052 if (mb_cur_max > 1) 1053 { 1054 if (using_utf8) 1055 { 1056 unsigned char c = *beg; 1057 if (c >= 0xc2) 1058 { 1059 if (c < 0xe0) 1060 ++beg; 1061 else if (c < 0xf0) 1062 beg += 2; 1063 else if (c < 0xf8) 1064 beg += 3; 1065 else if (c < 0xfc) 1066 beg += 4; 1067 else if (c < 0xfe) 1068 beg += 5; 1069 } 1070 } 1071 else 1072 { 1073 size_t l = mbrlen (beg, buf + size - beg, &mbs); 579 1074 580 failure: 581 ret_val = -1; 582 goto out; 1075 last_char = beg; 1076 if (l + 2 >= 2) 1077 beg += l - 1; 1078 else 1079 memset (&mbs, '\0', sizeof (mbstate_t)); 1080 } 1081 } 1082 #endif /* MBS_SUPPORT */ 1083 } 1084 1085 return -1; 583 1086 584 1087 success: 1088 #ifdef MBS_SUPPORT 1089 if (mb_cur_max > 1 && !using_utf8) 1090 { 1091 end = beg + len; 1092 while (end < buf + size) 1093 { 1094 size_t mlen = mbrlen (end, buf + size - end, &mbs); 1095 if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0) 1096 { 1097 memset (&mbs, '\0', sizeof (mbstate_t)); 1098 mlen = 1; 1099 } 1100 if (mlen == 1 && *end == eol) 1101 break; 1102 1103 end += mlen; 1104 } 1105 } 1106 else 1107 #endif /* MBS_SUPPORT */ 585 1108 end = memchr (beg + len, eol, (buf + size) - (beg + len)); 586 1109 end++; 587 1110 while (buf < beg && beg[-1] != eol) … … 591 1114 *match_size = len; 592 1115 ret_val = beg - buf; 593 1116 out: 594 #ifdef MBS_SUPPORT595 if (MB_CUR_MAX > 1)596 {597 if (match_icase)598 free((char*)buf);599 if (mb_properties)600 free(mb_properties);601 }602 #endif /* MBS_SUPPORT */603 1117 return ret_val; 604 1118 } 605 1119 #endif /* defined(GREP_PROGRAM) || defined(FGREP_PROGRAM) */
Note:
See TracBrowser
for help on using the repository browser.