antoine.pitrou
2008-07-22 17:53:23 UTC
Author: antoine.pitrou
Date: Tue Jul 22 19:53:22 2008
New Revision: 65185
Log:
#3231: re.compile fails with some bytes patterns
Modified:
python/branches/py3k/Lib/sre_parse.py
python/branches/py3k/Lib/test/re_tests.py
python/branches/py3k/Lib/test/test_re.py
Modified: python/branches/py3k/Lib/sre_parse.py
==============================================================================
--- python/branches/py3k/Lib/sre_parse.py (original)
+++ python/branches/py3k/Lib/sre_parse.py Tue Jul 22 19:53:22 2008
@@ -200,7 +200,7 @@
except IndexError:
raise error("bogus escape (end of line)")
if isinstance(self.string, bytes):
- char = chr(c)
+ c = chr(c)
char = char + c
self.index = self.index + len(char)
self.next = char
Modified: python/branches/py3k/Lib/test/re_tests.py
==============================================================================
--- python/branches/py3k/Lib/test/re_tests.py (original)
+++ python/branches/py3k/Lib/test/re_tests.py Tue Jul 22 19:53:22 2008
@@ -661,12 +661,8 @@
('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
]
-try:
- u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
-except SyntaxError:
- pass
-else:
- tests.extend([
+u = '\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'
+tests.extend([
# bug 410271: \b broken under locales
(r'\b.\b', 'a', SUCCEED, 'found', 'a'),
(r'(?u)\b.\b', u, SUCCEED, 'found', u),
Modified: python/branches/py3k/Lib/test/test_re.py
==============================================================================
--- python/branches/py3k/Lib/test/test_re.py (original)
+++ python/branches/py3k/Lib/test/test_re.py Tue Jul 22 19:53:22 2008
@@ -732,23 +732,25 @@
else:
print('=== Failed incorrectly', t)
- # Try the match on a unicode string, and check that it
- # still succeeds.
+ # Try the match with both pattern and string converted to
+ # bytes, and check that it still succeeds.
try:
- result = obj.search(str(s, "latin-1"))
- if result is None:
- print('=== Fails on unicode match', t)
- except NameError:
- continue # 1.5.2
- except TypeError:
- continue # unicode test case
-
- # Try the match on a unicode pattern, and check that it
- # still succeeds.
- obj=re.compile(str(pattern, "latin-1"))
- result = obj.search(s)
- if result is None:
- print('=== Fails on unicode pattern match', t)
+ bpat = bytes(pattern, "ascii")
+ bs = bytes(s, "ascii")
+ except UnicodeEncodeError:
+ # skip non-ascii tests
+ pass
+ else:
+ try:
+ bpat = re.compile(bpat)
+ except Exception:
+ print('=== Fails on bytes pattern compile', t)
+ if verbose:
+ traceback.print_exc(file=sys.stdout)
+ else:
+ bytes_result = bpat.search(bs)
+ if bytes_result is None:
+ print('=== Fails on bytes pattern match', t)
# Try the match with the search area limited to the extent
# of the match and see if it still succeeds. \B will
@@ -771,10 +773,11 @@
# Try the match with LOCALE enabled, and check that it
# still succeeds.
- obj = re.compile(pattern, re.LOCALE)
- result = obj.search(s)
- if result is None:
- print('=== Fails on locale-sensitive match', t)
+ if '(?u)' not in pattern:
+ obj = re.compile(pattern, re.LOCALE)
+ result = obj.search(s)
+ if result is None:
+ print('=== Fails on locale-sensitive match', t)
# Try the match with UNICODE locale enabled, and check
# that it still succeeds.
Date: Tue Jul 22 19:53:22 2008
New Revision: 65185
Log:
#3231: re.compile fails with some bytes patterns
Modified:
python/branches/py3k/Lib/sre_parse.py
python/branches/py3k/Lib/test/re_tests.py
python/branches/py3k/Lib/test/test_re.py
Modified: python/branches/py3k/Lib/sre_parse.py
==============================================================================
--- python/branches/py3k/Lib/sre_parse.py (original)
+++ python/branches/py3k/Lib/sre_parse.py Tue Jul 22 19:53:22 2008
@@ -200,7 +200,7 @@
except IndexError:
raise error("bogus escape (end of line)")
if isinstance(self.string, bytes):
- char = chr(c)
+ c = chr(c)
char = char + c
self.index = self.index + len(char)
self.next = char
Modified: python/branches/py3k/Lib/test/re_tests.py
==============================================================================
--- python/branches/py3k/Lib/test/re_tests.py (original)
+++ python/branches/py3k/Lib/test/re_tests.py Tue Jul 22 19:53:22 2008
@@ -661,12 +661,8 @@
('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
]
-try:
- u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
-except SyntaxError:
- pass
-else:
- tests.extend([
+u = '\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'
+tests.extend([
# bug 410271: \b broken under locales
(r'\b.\b', 'a', SUCCEED, 'found', 'a'),
(r'(?u)\b.\b', u, SUCCEED, 'found', u),
Modified: python/branches/py3k/Lib/test/test_re.py
==============================================================================
--- python/branches/py3k/Lib/test/test_re.py (original)
+++ python/branches/py3k/Lib/test/test_re.py Tue Jul 22 19:53:22 2008
@@ -732,23 +732,25 @@
else:
print('=== Failed incorrectly', t)
- # Try the match on a unicode string, and check that it
- # still succeeds.
+ # Try the match with both pattern and string converted to
+ # bytes, and check that it still succeeds.
try:
- result = obj.search(str(s, "latin-1"))
- if result is None:
- print('=== Fails on unicode match', t)
- except NameError:
- continue # 1.5.2
- except TypeError:
- continue # unicode test case
-
- # Try the match on a unicode pattern, and check that it
- # still succeeds.
- obj=re.compile(str(pattern, "latin-1"))
- result = obj.search(s)
- if result is None:
- print('=== Fails on unicode pattern match', t)
+ bpat = bytes(pattern, "ascii")
+ bs = bytes(s, "ascii")
+ except UnicodeEncodeError:
+ # skip non-ascii tests
+ pass
+ else:
+ try:
+ bpat = re.compile(bpat)
+ except Exception:
+ print('=== Fails on bytes pattern compile', t)
+ if verbose:
+ traceback.print_exc(file=sys.stdout)
+ else:
+ bytes_result = bpat.search(bs)
+ if bytes_result is None:
+ print('=== Fails on bytes pattern match', t)
# Try the match with the search area limited to the extent
# of the match and see if it still succeeds. \B will
@@ -771,10 +773,11 @@
# Try the match with LOCALE enabled, and check that it
# still succeeds.
- obj = re.compile(pattern, re.LOCALE)
- result = obj.search(s)
- if result is None:
- print('=== Fails on locale-sensitive match', t)
+ if '(?u)' not in pattern:
+ obj = re.compile(pattern, re.LOCALE)
+ result = obj.search(s)
+ if result is None:
+ print('=== Fails on locale-sensitive match', t)
# Try the match with UNICODE locale enabled, and check
# that it still succeeds.