brett.cannon
2008-11-21 00:17:54 UTC
Author: brett.cannon
Date: Fri Nov 21 01:17:53 2008
New Revision: 67310
Log:
Make dbm.dumb encode strings as UTF-8. Also fix it so it accepts bytes and
strings.
Closes issue #3799.
Modified:
python/branches/py3k/Lib/dbm/dumb.py
python/branches/py3k/Lib/test/test_dbm_dumb.py
python/branches/py3k/Misc/NEWS
Modified: python/branches/py3k/Lib/dbm/dumb.py
==============================================================================
--- python/branches/py3k/Lib/dbm/dumb.py (original)
+++ python/branches/py3k/Lib/dbm/dumb.py Fri Nov 21 01:17:53 2008
@@ -84,6 +84,7 @@
for line in f:
line = line.rstrip()
key, pos_and_siz_pair = eval(line)
+ key = key.encode('Latin-1')
self._index[key] = pos_and_siz_pair
f.close()
@@ -110,13 +111,16 @@
f = self._io.open(self._dirfile, 'w')
self._chmod(self._dirfile)
for key, pos_and_siz_pair in self._index.items():
- f.write("%r, %r\n" % (key, pos_and_siz_pair))
+ # Use Latin-1 since it has no qualms with any value in any
+ # position; UTF-8, though, does care sometimes.
+ f.write("%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair))
f.close()
sync = _commit
def __getitem__(self, key):
- key = key.decode("latin-1")
+ if isinstance(key, str):
+ key = key.encode('utf-8')
pos, siz = self._index[key] # may raise KeyError
f = _io.open(self._datfile, 'rb')
f.seek(pos)
@@ -161,11 +165,12 @@
f.close()
def __setitem__(self, key, val):
- if not isinstance(key, bytes):
- raise TypeError("keys must be bytes")
- key = key.decode("latin-1") # hashable bytes
+ if isinstance(key, str):
+ key = key.encode('utf-8')
+ elif not isinstance(key, (bytes, bytearray)):
+ raise TypeError("keys must be bytes or strings")
if not isinstance(val, (bytes, bytearray)):
- raise TypeError("values must be byte strings")
+ raise TypeError("values must be bytes")
if key not in self._index:
self._addkey(key, self._addval(val))
else:
@@ -191,7 +196,8 @@
# (so that _commit() never gets called).
def __delitem__(self, key):
- key = key.decode("latin-1")
+ if isinstance(key, str):
+ key = key.encode('utf-8')
# The blocks used by the associated value are lost.
del self._index[key]
# XXX It's unclear why we do a _commit() here (the code always
@@ -201,14 +207,14 @@
self._commit()
def keys(self):
- return [key.encode("latin-1") for key in self._index.keys()]
+ return list(self._index.keys())
def items(self):
- return [(key.encode("latin-1"), self[key.encode("latin-1")])
- for key in self._index.keys()]
+ return [(key, self[key]) for key in self._index.keys()]
def __contains__(self, key):
- key = key.decode("latin-1")
+ if isinstance(key, str):
+ key = key.encode('utf-8')
return key in self._index
def iterkeys(self):
Modified: python/branches/py3k/Lib/test/test_dbm_dumb.py
==============================================================================
--- python/branches/py3k/Lib/test/test_dbm_dumb.py (original)
+++ python/branches/py3k/Lib/test/test_dbm_dumb.py Fri Nov 21 01:17:53 2008
@@ -19,13 +19,14 @@
pass
class DumbDBMTestCase(unittest.TestCase):
- _dict = {'0': b'',
- 'a': b'Python:',
- 'b': b'Programming',
- 'c': b'the',
- 'd': b'way',
- 'f': b'Guido',
- 'g': b'intended',
+ _dict = {b'0': b'',
+ b'a': b'Python:',
+ b'b': b'Programming',
+ b'c': b'the',
+ b'd': b'way',
+ b'f': b'Guido',
+ b'g': b'intended',
+ '\u00fc'.encode('utf-8') : b'!',
}
def __init__(self, *args):
@@ -35,7 +36,7 @@
f = dumbdbm.open(_fname, 'c')
self.assertEqual(list(f.keys()), [])
for key in self._dict:
- f[key.encode("ascii")] = self._dict[key]
+ f[key] = self._dict[key]
self.read_helper(f)
f.close()
@@ -73,7 +74,7 @@
def test_dumbdbm_modification(self):
self.init_db()
f = dumbdbm.open(_fname, 'w')
- self._dict['g'] = f[b'g'] = b"indented"
+ self._dict[b'g'] = f[b'g'] = b"indented"
self.read_helper(f)
f.close()
@@ -105,6 +106,21 @@
self.assertEqual(f[b'1'], b'hello2')
f.close()
+ def test_str_read(self):
+ self.init_db()
+ f = dumbdbm.open(_fname, 'r')
+ self.assertEqual(f['\u00fc'], self._dict['\u00fc'.encode('utf-8')])
+
+ def test_str_write_contains(self):
+ self.init_db()
+ f = dumbdbm.open(_fname)
+ f['\u00fc'] = b'!'
+ f.close()
+ f = dumbdbm.open(_fname, 'r')
+ self.assert_('\u00fc' in f)
+ self.assertEqual(f['\u00fc'.encode('utf-8')],
+ self._dict['\u00fc'.encode('utf-8')])
+
def test_line_endings(self):
# test for bug #1172763: dumbdbm would die if the line endings
# weren't what was expected.
@@ -129,16 +145,16 @@
def read_helper(self, f):
keys = self.keys_helper(f)
for key in self._dict:
- self.assertEqual(self._dict[key], f[key.encode("ascii")])
+ self.assertEqual(self._dict[key], f[key])
def init_db(self):
f = dumbdbm.open(_fname, 'w')
for k in self._dict:
- f[k.encode("ascii")] = self._dict[k]
+ f[k] = self._dict[k]
f.close()
def keys_helper(self, f):
- keys = sorted(k.decode("ascii") for k in f.keys())
+ keys = sorted(f.keys())
dkeys = sorted(self._dict.keys())
self.assertEqual(keys, dkeys)
return keys
@@ -155,12 +171,12 @@
if random.random() < 0.2:
if k in d:
del d[k]
- del f[k.encode("ascii")]
+ del f[k]
else:
v = random.choice((b'a', b'b', b'c')) * random.randrange(10000)
d[k] = v
- f[k.encode("ascii")] = v
- self.assertEqual(f[k.encode("ascii")], v)
+ f[k] = v
+ self.assertEqual(f[k], v)
f.close()
f = dumbdbm.open(_fname)
Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS (original)
+++ python/branches/py3k/Misc/NEWS Fri Nov 21 01:17:53 2008
@@ -19,7 +19,7 @@
- Issue #3327: Don't overallocate in the modules_by_index list.
- Issue #1721812: Binary set operations and copy() returned the input type
- instead of the appropriate base type. This was incorrect because set
+ instead of the appropriate base type. This was incorrect because set
subclasses would be created without their __init__() method being called.
The corrected behavior brings sets into line with lists and dicts.
@@ -33,6 +33,9 @@
Library
-------
+- Issue #3799: Fix dbm.dumb to accept strings as well as bytes for keys. String
+ keys are now written out in UTF-8.
+
- Issue #4338: Fix distutils upload command.
- Issue #4354: Fix distutils register command.
Date: Fri Nov 21 01:17:53 2008
New Revision: 67310
Log:
Make dbm.dumb encode strings as UTF-8. Also fix it so it accepts bytes and
strings.
Closes issue #3799.
Modified:
python/branches/py3k/Lib/dbm/dumb.py
python/branches/py3k/Lib/test/test_dbm_dumb.py
python/branches/py3k/Misc/NEWS
Modified: python/branches/py3k/Lib/dbm/dumb.py
==============================================================================
--- python/branches/py3k/Lib/dbm/dumb.py (original)
+++ python/branches/py3k/Lib/dbm/dumb.py Fri Nov 21 01:17:53 2008
@@ -84,6 +84,7 @@
for line in f:
line = line.rstrip()
key, pos_and_siz_pair = eval(line)
+ key = key.encode('Latin-1')
self._index[key] = pos_and_siz_pair
f.close()
@@ -110,13 +111,16 @@
f = self._io.open(self._dirfile, 'w')
self._chmod(self._dirfile)
for key, pos_and_siz_pair in self._index.items():
- f.write("%r, %r\n" % (key, pos_and_siz_pair))
+ # Use Latin-1 since it has no qualms with any value in any
+ # position; UTF-8, though, does care sometimes.
+ f.write("%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair))
f.close()
sync = _commit
def __getitem__(self, key):
- key = key.decode("latin-1")
+ if isinstance(key, str):
+ key = key.encode('utf-8')
pos, siz = self._index[key] # may raise KeyError
f = _io.open(self._datfile, 'rb')
f.seek(pos)
@@ -161,11 +165,12 @@
f.close()
def __setitem__(self, key, val):
- if not isinstance(key, bytes):
- raise TypeError("keys must be bytes")
- key = key.decode("latin-1") # hashable bytes
+ if isinstance(key, str):
+ key = key.encode('utf-8')
+ elif not isinstance(key, (bytes, bytearray)):
+ raise TypeError("keys must be bytes or strings")
if not isinstance(val, (bytes, bytearray)):
- raise TypeError("values must be byte strings")
+ raise TypeError("values must be bytes")
if key not in self._index:
self._addkey(key, self._addval(val))
else:
@@ -191,7 +196,8 @@
# (so that _commit() never gets called).
def __delitem__(self, key):
- key = key.decode("latin-1")
+ if isinstance(key, str):
+ key = key.encode('utf-8')
# The blocks used by the associated value are lost.
del self._index[key]
# XXX It's unclear why we do a _commit() here (the code always
@@ -201,14 +207,14 @@
self._commit()
def keys(self):
- return [key.encode("latin-1") for key in self._index.keys()]
+ return list(self._index.keys())
def items(self):
- return [(key.encode("latin-1"), self[key.encode("latin-1")])
- for key in self._index.keys()]
+ return [(key, self[key]) for key in self._index.keys()]
def __contains__(self, key):
- key = key.decode("latin-1")
+ if isinstance(key, str):
+ key = key.encode('utf-8')
return key in self._index
def iterkeys(self):
Modified: python/branches/py3k/Lib/test/test_dbm_dumb.py
==============================================================================
--- python/branches/py3k/Lib/test/test_dbm_dumb.py (original)
+++ python/branches/py3k/Lib/test/test_dbm_dumb.py Fri Nov 21 01:17:53 2008
@@ -19,13 +19,14 @@
pass
class DumbDBMTestCase(unittest.TestCase):
- _dict = {'0': b'',
- 'a': b'Python:',
- 'b': b'Programming',
- 'c': b'the',
- 'd': b'way',
- 'f': b'Guido',
- 'g': b'intended',
+ _dict = {b'0': b'',
+ b'a': b'Python:',
+ b'b': b'Programming',
+ b'c': b'the',
+ b'd': b'way',
+ b'f': b'Guido',
+ b'g': b'intended',
+ '\u00fc'.encode('utf-8') : b'!',
}
def __init__(self, *args):
@@ -35,7 +36,7 @@
f = dumbdbm.open(_fname, 'c')
self.assertEqual(list(f.keys()), [])
for key in self._dict:
- f[key.encode("ascii")] = self._dict[key]
+ f[key] = self._dict[key]
self.read_helper(f)
f.close()
@@ -73,7 +74,7 @@
def test_dumbdbm_modification(self):
self.init_db()
f = dumbdbm.open(_fname, 'w')
- self._dict['g'] = f[b'g'] = b"indented"
+ self._dict[b'g'] = f[b'g'] = b"indented"
self.read_helper(f)
f.close()
@@ -105,6 +106,21 @@
self.assertEqual(f[b'1'], b'hello2')
f.close()
+ def test_str_read(self):
+ self.init_db()
+ f = dumbdbm.open(_fname, 'r')
+ self.assertEqual(f['\u00fc'], self._dict['\u00fc'.encode('utf-8')])
+
+ def test_str_write_contains(self):
+ self.init_db()
+ f = dumbdbm.open(_fname)
+ f['\u00fc'] = b'!'
+ f.close()
+ f = dumbdbm.open(_fname, 'r')
+ self.assert_('\u00fc' in f)
+ self.assertEqual(f['\u00fc'.encode('utf-8')],
+ self._dict['\u00fc'.encode('utf-8')])
+
def test_line_endings(self):
# test for bug #1172763: dumbdbm would die if the line endings
# weren't what was expected.
@@ -129,16 +145,16 @@
def read_helper(self, f):
keys = self.keys_helper(f)
for key in self._dict:
- self.assertEqual(self._dict[key], f[key.encode("ascii")])
+ self.assertEqual(self._dict[key], f[key])
def init_db(self):
f = dumbdbm.open(_fname, 'w')
for k in self._dict:
- f[k.encode("ascii")] = self._dict[k]
+ f[k] = self._dict[k]
f.close()
def keys_helper(self, f):
- keys = sorted(k.decode("ascii") for k in f.keys())
+ keys = sorted(f.keys())
dkeys = sorted(self._dict.keys())
self.assertEqual(keys, dkeys)
return keys
@@ -155,12 +171,12 @@
if random.random() < 0.2:
if k in d:
del d[k]
- del f[k.encode("ascii")]
+ del f[k]
else:
v = random.choice((b'a', b'b', b'c')) * random.randrange(10000)
d[k] = v
- f[k.encode("ascii")] = v
- self.assertEqual(f[k.encode("ascii")], v)
+ f[k] = v
+ self.assertEqual(f[k], v)
f.close()
f = dumbdbm.open(_fname)
Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS (original)
+++ python/branches/py3k/Misc/NEWS Fri Nov 21 01:17:53 2008
@@ -19,7 +19,7 @@
- Issue #3327: Don't overallocate in the modules_by_index list.
- Issue #1721812: Binary set operations and copy() returned the input type
- instead of the appropriate base type. This was incorrect because set
+ instead of the appropriate base type. This was incorrect because set
subclasses would be created without their __init__() method being called.
The corrected behavior brings sets into line with lists and dicts.
@@ -33,6 +33,9 @@
Library
-------
+- Issue #3799: Fix dbm.dumb to accept strings as well as bytes for keys. String
+ keys are now written out in UTF-8.
+
- Issue #4338: Fix distutils upload command.
- Issue #4354: Fix distutils register command.