Discussion:
[Python-3000-checkins] r66366 - in python/branches/py3k: Lib/re.py Lib/test/test_re.py Misc/NEWS
guido.van.rossum
2008-09-10 17:44:36 UTC
Permalink
Author: guido.van.rossum
Date: Wed Sep 10 19:44:35 2008
New Revision: 66366

Log:
Issue #3756: make re.escape() handle bytes as well as str.
Patch by Andrew McNamara, reviewed and tweaked by myself.


Modified:
python/branches/py3k/Lib/re.py
python/branches/py3k/Lib/test/test_re.py
python/branches/py3k/Misc/NEWS

Modified: python/branches/py3k/Lib/re.py
==============================================================================
--- python/branches/py3k/Lib/re.py (original)
+++ python/branches/py3k/Lib/re.py Wed Sep 10 19:44:35 2008
@@ -211,23 +211,38 @@
"Compile a template pattern, returning a pattern object"
return _compile(pattern, flags|T)

-_alphanum = {}
-for c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890':
- _alphanum[c] = 1
-del c
+_alphanum_str = frozenset(
+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
+_alphanum_bytes = frozenset(
+ b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")

def escape(pattern):
"Escape all non-alphanumeric characters in pattern."
- s = list(pattern)
- alphanum = _alphanum
- for i in range(len(pattern)):
- c = pattern[i]
- if c not in alphanum:
- if c == "\000":
- s[i] = "\\000"
+ if isinstance(pattern, str):
+ alphanum = _alphanum_str
+ s = list(pattern)
+ for i in range(len(pattern)):
+ c = pattern[i]
+ if c not in alphanum:
+ if c == "\000":
+ s[i] = "\\000"
+ else:
+ s[i] = "\\" + c
+ return "".join(s)
+ else:
+ alphanum = _alphanum_bytes
+ s = []
+ esc = ord(b"\\")
+ for c in pattern:
+ if c in alphanum:
+ s.append(c)
else:
- s[i] = "\\" + c
- return pattern[:0].join(s)
+ if c == 0:
+ s.extend(b"\\000")
+ else:
+ s.append(esc)
+ s.append(c)
+ return bytes(s)

# --------------------------------------------------------------------
# internals
@@ -248,7 +263,8 @@
pattern, flags = key
if isinstance(pattern, _pattern_type):
if flags:
- raise ValueError('Cannot process flags argument with a compiled pattern')
+ raise ValueError(
+ "Cannot process flags argument with a compiled pattern")
return pattern
if not sre_compile.isstring(pattern):
raise TypeError("first argument must be string or compiled pattern")
@@ -325,7 +341,7 @@
if i == j:
break
action = self.lexicon[m.lastindex-1][1]
- if hasattr(action, '__call__'):
+ if hasattr(action, "__call__"):
self.match = m
action = action(self, m.group())
if action is not None:

Modified: python/branches/py3k/Lib/test/test_re.py
==============================================================================
--- python/branches/py3k/Lib/test/test_re.py (original)
+++ python/branches/py3k/Lib/test/test_re.py Wed Sep 10 19:44:35 2008
@@ -416,6 +416,7 @@

def test_re_escape(self):
p=""
+ self.assertEqual(re.escape(p), p)
for i in range(0, 256):
p = p + chr(i)
self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
@@ -426,6 +427,19 @@
self.assertEqual(pat.match(p) is not None, True)
self.assertEqual(pat.match(p).span(), (0,256))

+ def test_re_escape_byte(self):
+ p=b""
+ self.assertEqual(re.escape(p), p)
+ for i in range(0, 256):
+ b = bytes([i])
+ p += b
+ self.assertEqual(re.match(re.escape(b), b) is not None, True)
+ self.assertEqual(re.match(re.escape(b), b).span(), (0,1))
+
+ pat=re.compile(re.escape(p))
+ self.assertEqual(pat.match(p) is not None, True)
+ self.assertEqual(pat.match(p).span(), (0,256))
+
def pickle_test(self, pickle):
oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
s = pickle.dumps(oldpat)

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS (original)
+++ python/branches/py3k/Misc/NEWS Wed Sep 10 19:44:35 2008
@@ -96,6 +96,8 @@
Library
-------

+- Issue #3756: make re.escape() handle bytes as well as str.
+
- Issue #3800: fix filter() related bug in formatter.py.

- Issue #874900: fix behaviour of threading module after a fork.

Loading...