[67300de] | 1 | Submitted By: Ken Moffat <ken at linuxfromscratch dot org>
|
---|
| 2 | Date: 2008-07-09
|
---|
| 3 | Initial Package Version: 5.8.8
|
---|
| 4 | Upstream Status: unknown
|
---|
| 5 | Origin: Extracted from redhat-enterprise.
|
---|
| 6 | Description: Fixes CVE-2007-5116 and CVE-2008-1927. NB - anyone who
|
---|
| 7 | thinks we should just upgrade to 5.10.0 needs to find a fix for
|
---|
| 8 | CVE-2008-2827 : no doubt 5.10.1 will fix that when it is released.
|
---|
| 9 | Edited to remove the creation of regcomp.c~ which I had overlooked.
|
---|
| 10 |
|
---|
| 11 | diff -Naur perl-5.8.8.orig/embed.fnc perl-5.8.8/embed.fnc
|
---|
| 12 | --- perl-5.8.8.orig/embed.fnc 2006-01-31 14:40:27.000000000 +0000
|
---|
| 13 | +++ perl-5.8.8/embed.fnc 2008-07-09 23:13:32.000000000 +0100
|
---|
| 14 | @@ -1168,6 +1168,7 @@
|
---|
| 15 | Es |regnode*|regclass |NN struct RExC_state_t *state
|
---|
| 16 | ERs |I32 |regcurly |NN const char *
|
---|
| 17 | Es |regnode*|reg_node |NN struct RExC_state_t *state|U8 op
|
---|
| 18 | +Es |UV |reg_recode |const char value|NULLOK SV **encp
|
---|
| 19 | Es |regnode*|regpiece |NN struct RExC_state_t *state|NN I32 *flagp
|
---|
| 20 | Es |void |reginsert |NN struct RExC_state_t *state|U8 op|NN regnode *opnd
|
---|
| 21 | Es |void |regoptail |NN struct RExC_state_t *state|NN regnode *p|NN regnode *val
|
---|
| 22 | diff -Naur perl-5.8.8.orig/embed.h perl-5.8.8/embed.h
|
---|
| 23 | --- perl-5.8.8.orig/embed.h 2006-01-31 15:50:34.000000000 +0000
|
---|
| 24 | +++ perl-5.8.8/embed.h 2008-07-09 23:13:32.000000000 +0100
|
---|
| 25 | @@ -1234,6 +1234,7 @@
|
---|
| 26 | #define regclass S_regclass
|
---|
| 27 | #define regcurly S_regcurly
|
---|
| 28 | #define reg_node S_reg_node
|
---|
| 29 | +#define reg_recode S_reg_recode
|
---|
| 30 | #define regpiece S_regpiece
|
---|
| 31 | #define reginsert S_reginsert
|
---|
| 32 | #define regoptail S_regoptail
|
---|
| 33 | @@ -3277,6 +3278,7 @@
|
---|
| 34 | #define regclass(a) S_regclass(aTHX_ a)
|
---|
| 35 | #define regcurly(a) S_regcurly(aTHX_ a)
|
---|
| 36 | #define reg_node(a,b) S_reg_node(aTHX_ a,b)
|
---|
| 37 | +#define reg_recode(a,b) S_reg_recode(aTHX_ a,b)
|
---|
| 38 | #define regpiece(a,b) S_regpiece(aTHX_ a,b)
|
---|
| 39 | #define reginsert(a,b,c) S_reginsert(aTHX_ a,b,c)
|
---|
| 40 | #define regoptail(a,b,c) S_regoptail(aTHX_ a,b,c)
|
---|
| 41 | diff -Naur perl-5.8.8.orig/pod/perldiag.pod perl-5.8.8/pod/perldiag.pod
|
---|
| 42 | --- perl-5.8.8.orig/pod/perldiag.pod 2006-01-06 23:16:08.000000000 +0000
|
---|
| 43 | +++ perl-5.8.8/pod/perldiag.pod 2008-07-09 23:13:32.000000000 +0100
|
---|
| 44 | @@ -1900,6 +1900,15 @@
|
---|
| 45 | (W printf) Perl does not understand the given format conversion. See
|
---|
| 46 | L<perlfunc/sprintf>.
|
---|
| 47 |
|
---|
| 48 | +=item Invalid escape in the specified encoding in regex; marked by <-- HERE in m/%s/
|
---|
| 49 | +
|
---|
| 50 | +(W regexp) The numeric escape (for example C<\xHH>) of value < 256
|
---|
| 51 | +didn't correspond to a single character through the conversion
|
---|
| 52 | +from the encoding specified by the encoding pragma.
|
---|
| 53 | +The escape was replaced with REPLACEMENT CHARACTER (U+FFFD) instead.
|
---|
| 54 | +The <-- HERE shows in the regular expression about where the
|
---|
| 55 | +escape was discovered.
|
---|
| 56 | +
|
---|
| 57 | =item Invalid [] range "%s" in regex; marked by <-- HERE in m/%s/
|
---|
| 58 |
|
---|
| 59 | (F) The range specified in a character class had a minimum character
|
---|
| 60 | diff -Naur perl-5.8.8.orig/proto.h perl-5.8.8/proto.h
|
---|
| 61 | --- perl-5.8.8.orig/proto.h 2006-01-31 15:50:34.000000000 +0000
|
---|
| 62 | +++ perl-5.8.8/proto.h 2008-07-09 23:13:32.000000000 +0100
|
---|
| 63 | @@ -1748,6 +1748,7 @@
|
---|
| 64 | __attribute__warn_unused_result__;
|
---|
| 65 |
|
---|
| 66 | STATIC regnode* S_reg_node(pTHX_ struct RExC_state_t *state, U8 op);
|
---|
| 67 | +STATIC UV S_reg_recode(pTHX_ const char value, SV **encp);
|
---|
| 68 | STATIC regnode* S_regpiece(pTHX_ struct RExC_state_t *state, I32 *flagp);
|
---|
| 69 | STATIC void S_reginsert(pTHX_ struct RExC_state_t *state, U8 op, regnode *opnd);
|
---|
| 70 | STATIC void S_regoptail(pTHX_ struct RExC_state_t *state, regnode *p, regnode *val);
|
---|
| 71 | diff -Naur perl-5.8.8.orig/regcomp.c perl-5.8.8/regcomp.c
|
---|
| 72 | --- perl-5.8.8.orig/regcomp.c 2006-01-08 20:59:27.000000000 +0000
|
---|
| 73 | +++ perl-5.8.8/regcomp.c 2008-07-09 23:13:32.000000000 +0100
|
---|
| 74 | @@ -136,6 +136,7 @@
|
---|
| 75 | I32 seen_zerolen;
|
---|
| 76 | I32 seen_evals;
|
---|
| 77 | I32 utf8;
|
---|
| 78 | + I32 orig_utf8;
|
---|
| 79 | #if ADD_TO_REGEXEC
|
---|
| 80 | char *starttry; /* -Dr: where regtry was called. */
|
---|
| 81 | #define RExC_starttry (pRExC_state->starttry)
|
---|
| 82 | @@ -161,6 +162,7 @@
|
---|
| 83 | #define RExC_seen_zerolen (pRExC_state->seen_zerolen)
|
---|
| 84 | #define RExC_seen_evals (pRExC_state->seen_evals)
|
---|
| 85 | #define RExC_utf8 (pRExC_state->utf8)
|
---|
| 86 | +#define RExC_orig_utf8 (pRExC_state->orig_utf8)
|
---|
| 87 |
|
---|
| 88 | #define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?')
|
---|
| 89 | #define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
|
---|
| 90 | @@ -1749,15 +1751,17 @@
|
---|
| 91 | if (exp == NULL)
|
---|
| 92 | FAIL("NULL regexp argument");
|
---|
| 93 |
|
---|
| 94 | - RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
|
---|
| 95 | + RExC_orig_utf8 = RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
|
---|
| 96 |
|
---|
| 97 | - RExC_precomp = exp;
|
---|
| 98 | DEBUG_r({
|
---|
| 99 | if (!PL_colorset) reginitcolors();
|
---|
| 100 | PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
|
---|
| 101 | PL_colors[4],PL_colors[5],PL_colors[0],
|
---|
| 102 | - (int)(xend - exp), RExC_precomp, PL_colors[1]);
|
---|
| 103 | + (int)(xend - exp), exp, PL_colors[1]);
|
---|
| 104 | });
|
---|
| 105 | +
|
---|
| 106 | +redo_first_pass:
|
---|
| 107 | + RExC_precomp = exp;
|
---|
| 108 | RExC_flags = pm->op_pmflags;
|
---|
| 109 | RExC_sawback = 0;
|
---|
| 110 |
|
---|
| 111 | @@ -1783,6 +1787,17 @@
|
---|
| 112 | RExC_precomp = Nullch;
|
---|
| 113 | return(NULL);
|
---|
| 114 | }
|
---|
| 115 | + if (RExC_utf8 && !RExC_orig_utf8) {
|
---|
| 116 | + STRLEN len = xend-exp;
|
---|
| 117 | + DEBUG_r(PerlIO_printf(Perl_debug_log,
|
---|
| 118 | + "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
|
---|
| 119 | + exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len);
|
---|
| 120 | + xend = exp + len;
|
---|
| 121 | + RExC_orig_utf8 = RExC_utf8;
|
---|
| 122 | + SAVEFREEPV(exp);
|
---|
| 123 | + goto redo_first_pass;
|
---|
| 124 | + }
|
---|
| 125 | +
|
---|
| 126 | DEBUG_r(PerlIO_printf(Perl_debug_log, "size %"IVdf" ", (IV)RExC_size));
|
---|
| 127 |
|
---|
| 128 | /* Small enough for pointer-storage convention?
|
---|
| 129 | @@ -2775,6 +2790,39 @@
|
---|
| 130 | }
|
---|
| 131 |
|
---|
| 132 | /*
|
---|
| 133 | + * reg_recode
|
---|
| 134 | + *
|
---|
| 135 | + * It returns the code point in utf8 for the value in *encp.
|
---|
| 136 | + * value: a code value in the source encoding
|
---|
| 137 | + * encp: a pointer to an Encode object
|
---|
| 138 | + *
|
---|
| 139 | + * If the result from Encode is not a single character,
|
---|
| 140 | + * it returns U+FFFD (Replacement character) and sets *encp to NULL.
|
---|
| 141 | + */
|
---|
| 142 | +STATIC UV
|
---|
| 143 | +S_reg_recode(pTHX_ const char value, SV **encp)
|
---|
| 144 | +{
|
---|
| 145 | + STRLEN numlen = 1;
|
---|
| 146 | + SV * const sv = sv_2mortal(newSVpvn(&value, numlen));
|
---|
| 147 | + const char * const s = encp && *encp ? sv_recode_to_utf8(sv, *encp)
|
---|
| 148 | + : SvPVX(sv);
|
---|
| 149 | + const STRLEN newlen = SvCUR(sv);
|
---|
| 150 | + UV uv = UNICODE_REPLACEMENT;
|
---|
| 151 | +
|
---|
| 152 | + if (newlen)
|
---|
| 153 | + uv = SvUTF8(sv)
|
---|
| 154 | + ? utf8n_to_uvchr((U8*)s, newlen, &numlen, UTF8_ALLOW_DEFAULT)
|
---|
| 155 | + : *(U8*)s;
|
---|
| 156 | +
|
---|
| 157 | + if (!newlen || numlen != newlen) {
|
---|
| 158 | + uv = UNICODE_REPLACEMENT;
|
---|
| 159 | + if (encp)
|
---|
| 160 | + *encp = NULL;
|
---|
| 161 | + }
|
---|
| 162 | + return uv;
|
---|
| 163 | +}
|
---|
| 164 | +
|
---|
| 165 | +/*
|
---|
| 166 | - regatom - the lowest level
|
---|
| 167 | *
|
---|
| 168 | * Optimization: gobbles an entire sequence of ordinary characters so that
|
---|
| 169 | @@ -3166,6 +3214,8 @@
|
---|
| 170 | ender = grok_hex(p, &numlen, &flags, NULL);
|
---|
| 171 | p += numlen;
|
---|
| 172 | }
|
---|
| 173 | + if (PL_encoding && ender < 0x100)
|
---|
| 174 | + goto recode_encoding;
|
---|
| 175 | break;
|
---|
| 176 | case 'c':
|
---|
| 177 | p++;
|
---|
| 178 | @@ -3185,6 +3235,17 @@
|
---|
| 179 | --p;
|
---|
| 180 | goto loopdone;
|
---|
| 181 | }
|
---|
| 182 | + if (PL_encoding && ender < 0x100)
|
---|
| 183 | + goto recode_encoding;
|
---|
| 184 | + break;
|
---|
| 185 | + recode_encoding:
|
---|
| 186 | + {
|
---|
| 187 | + SV* enc = PL_encoding;
|
---|
| 188 | + ender = reg_recode((const char)(U8)ender, &enc);
|
---|
| 189 | + if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
|
---|
| 190 | + vWARN(p, "Invalid escape in the specified encoding");
|
---|
| 191 | + RExC_utf8 = 1;
|
---|
| 192 | + }
|
---|
| 193 | break;
|
---|
| 194 | case '\0':
|
---|
| 195 | if (p >= RExC_end)
|
---|
| 196 | @@ -3315,32 +3376,6 @@
|
---|
| 197 | break;
|
---|
| 198 | }
|
---|
| 199 |
|
---|
| 200 | - /* If the encoding pragma is in effect recode the text of
|
---|
| 201 | - * any EXACT-kind nodes. */
|
---|
| 202 | - if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) {
|
---|
| 203 | - STRLEN oldlen = STR_LEN(ret);
|
---|
| 204 | - SV *sv = sv_2mortal(newSVpvn(STRING(ret), oldlen));
|
---|
| 205 | -
|
---|
| 206 | - if (RExC_utf8)
|
---|
| 207 | - SvUTF8_on(sv);
|
---|
| 208 | - if (sv_utf8_downgrade(sv, TRUE)) {
|
---|
| 209 | - const char * const s = sv_recode_to_utf8(sv, PL_encoding);
|
---|
| 210 | - const STRLEN newlen = SvCUR(sv);
|
---|
| 211 | -
|
---|
| 212 | - if (SvUTF8(sv))
|
---|
| 213 | - RExC_utf8 = 1;
|
---|
| 214 | - if (!SIZE_ONLY) {
|
---|
| 215 | - DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n",
|
---|
| 216 | - (int)oldlen, STRING(ret),
|
---|
| 217 | - (int)newlen, s));
|
---|
| 218 | - Copy(s, STRING(ret), newlen, char);
|
---|
| 219 | - STR_LEN(ret) += newlen - oldlen;
|
---|
| 220 | - RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen);
|
---|
| 221 | - } else
|
---|
| 222 | - RExC_size += STR_SZ(newlen) - STR_SZ(oldlen);
|
---|
| 223 | - }
|
---|
| 224 | - }
|
---|
| 225 | -
|
---|
| 226 | return(ret);
|
---|
| 227 | }
|
---|
| 228 |
|
---|
| 229 | @@ -3718,6 +3753,8 @@
|
---|
| 230 | value = grok_hex(RExC_parse, &numlen, &flags, NULL);
|
---|
| 231 | RExC_parse += numlen;
|
---|
| 232 | }
|
---|
| 233 | + if (PL_encoding && value < 0x100)
|
---|
| 234 | + goto recode_encoding;
|
---|
| 235 | break;
|
---|
| 236 | case 'c':
|
---|
| 237 | value = UCHARAT(RExC_parse++);
|
---|
| 238 | @@ -3725,13 +3762,24 @@
|
---|
| 239 | break;
|
---|
| 240 | case '0': case '1': case '2': case '3': case '4':
|
---|
| 241 | case '5': case '6': case '7': case '8': case '9':
|
---|
| 242 | - {
|
---|
| 243 | - I32 flags = 0;
|
---|
| 244 | - numlen = 3;
|
---|
| 245 | - value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
|
---|
| 246 | - RExC_parse += numlen;
|
---|
| 247 | - break;
|
---|
| 248 | - }
|
---|
| 249 | + {
|
---|
| 250 | + I32 flags = 0;
|
---|
| 251 | + numlen = 3;
|
---|
| 252 | + value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
|
---|
| 253 | + RExC_parse += numlen;
|
---|
| 254 | + if (PL_encoding && value < 0x100)
|
---|
| 255 | + goto recode_encoding;
|
---|
| 256 | + break;
|
---|
| 257 | + }
|
---|
| 258 | + recode_encoding:
|
---|
| 259 | + {
|
---|
| 260 | + SV* enc = PL_encoding;
|
---|
| 261 | + value = reg_recode((const char)(U8)value, &enc);
|
---|
| 262 | + if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
|
---|
| 263 | + vWARN(RExC_parse,
|
---|
| 264 | + "Invalid escape in the specified encoding");
|
---|
| 265 | + break;
|
---|
| 266 | + }
|
---|
| 267 | default:
|
---|
| 268 | if (!SIZE_ONLY && isALPHA(value) && ckWARN(WARN_REGEXP))
|
---|
| 269 | vWARN2(RExC_parse,
|
---|
| 270 | diff -Naur perl-5.8.8.orig/t/uni/tr_utf8.t perl-5.8.8/t/uni/tr_utf8.t
|
---|
| 271 | --- perl-5.8.8.orig/t/uni/tr_utf8.t 2004-06-25 09:53:16.000000000 +0100
|
---|
| 272 | +++ perl-5.8.8/t/uni/tr_utf8.t 2008-07-09 23:13:32.000000000 +0100
|
---|
| 273 | @@ -31,7 +31,7 @@
|
---|
| 274 | }
|
---|
| 275 |
|
---|
| 276 | use strict;
|
---|
| 277 | -use Test::More tests => 7;
|
---|
| 278 | +use Test::More tests => 8;
|
---|
| 279 |
|
---|
| 280 | use encoding 'utf8';
|
---|
| 281 |
|
---|
| 282 | @@ -67,4 +67,12 @@
|
---|
| 283 | $line =~ tr/bcdeghijklmnprstvwxyz$02578/×׊××¢×××ײק××× ×€ÖŒ×š×¡×װש×××שױתײ××/;
|
---|
| 284 | is($line, "a×׊××¢f×××ײק××× o×€q֌ךסu×װש×××ש1×±34ת6ײ×9", "[perl #16843]");
|
---|
| 285 | }
|
---|
| 286 | +
|
---|
| 287 | +{
|
---|
| 288 | + # [perl #40641]
|
---|
| 289 | + my $str = qq/GebÀÀÀÀÀÀÀÀÀÀÀÀÀÀÀÀÀÀÀÀude/;
|
---|
| 290 | + my $reg = qr/GebÀÀÀÀÀÀÀÀÀÀÀÀÀÀÀÀÀÀÀÀude/;
|
---|
| 291 | + ok($str =~ /$reg/, "[perl #40641]");
|
---|
| 292 | +}
|
---|
| 293 | +
|
---|
| 294 | __END__
|
---|
| 295 | diff -Naur perl-5.8.8.orig/utf8.h perl-5.8.8/utf8.h
|
---|
| 296 | --- perl-5.8.8.orig/utf8.h 2006-01-08 21:11:27.000000000 +0000
|
---|
| 297 | +++ perl-5.8.8/utf8.h 2008-07-09 23:13:32.000000000 +0100
|
---|
| 298 | @@ -198,6 +198,8 @@
|
---|
| 299 | UTF8_ALLOW_SURROGATE|UTF8_ALLOW_FFFF)
|
---|
| 300 | #define UTF8_ALLOW_ANY 0x00FF
|
---|
| 301 | #define UTF8_CHECK_ONLY 0x0200
|
---|
| 302 | +#define UTF8_ALLOW_DEFAULT (ckWARN(WARN_UTF8) ? 0 : \
|
---|
| 303 | + UTF8_ALLOW_ANYUV)
|
---|
| 304 |
|
---|
| 305 | #define UNICODE_SURROGATE_FIRST 0xD800
|
---|
| 306 | #define UNICODE_SURROGATE_LAST 0xDFFF
|
---|