amaury.forgeotdarc
2008-11-11 23:05:00 UTC
Author: amaury.forgeotdarc
Date: Wed Nov 12 00:04:59 2008
New Revision: 67190
Log:
#3705: Command-line arguments were not correctly decoded when the
terminal does not use UTF8.
Now the code propagates the unicode string as far as possible, and avoids
the conversion to char* which implicitely uses utf-8.
Reviewed by Benjamin.
Modified:
python/branches/py3k/Lib/test/test_cmd_line.py
python/branches/py3k/Misc/NEWS
python/branches/py3k/Modules/main.c
python/branches/py3k/Python/import.c
Modified: python/branches/py3k/Lib/test/test_cmd_line.py
==============================================================================
--- python/branches/py3k/Lib/test/test_cmd_line.py (original)
+++ python/branches/py3k/Lib/test/test_cmd_line.py Wed Nov 12 00:04:59 2008
@@ -135,6 +135,12 @@
self.exit_code('-c', 'pass'),
0)
+ # Test handling of non-ascii data
+ command = "assert(ord('\xe9') == 0xe9)"
+ self.assertEqual(
+ self.exit_code('-c', command),
+ 0)
+
def test_main():
test.support.run_unittest(CmdLineTest)
Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS (original)
+++ python/branches/py3k/Misc/NEWS Wed Nov 12 00:04:59 2008
@@ -13,6 +13,9 @@
Core and Builtins
-----------------
+- Issue #3705: Command-line arguments were not correctly decoded when the
+ terminal does not use UTF8.
+
Library
-------
Modified: python/branches/py3k/Modules/main.c
==============================================================================
--- python/branches/py3k/Modules/main.c (original)
+++ python/branches/py3k/Modules/main.c Wed Nov 12 00:04:59 2008
@@ -287,7 +287,7 @@
{
int c;
int sts;
- char *command = NULL;
+ wchar_t *command = NULL;
wchar_t *filename = NULL;
wchar_t *module = NULL;
FILE *fp = stdin;
@@ -299,7 +299,6 @@
int version = 0;
int saw_unbuffered_flag = 0;
PyCompilerFlags cf;
- char *oldloc;
cf.cf_flags = 0;
@@ -310,30 +309,19 @@
while ((c = _PyOS_GetOpt(argc, argv, PROGRAM_OPTS)) != EOF) {
if (c == 'c') {
- size_t r1, r2;
- oldloc = setlocale(LC_ALL, NULL);
- setlocale(LC_ALL, "");
- r1 = wcslen(_PyOS_optarg);
- r2 = wcstombs(NULL, _PyOS_optarg, r1);
- if (r2 == (size_t) -1)
- Py_FatalError(
- "cannot convert character encoding of -c argument");
- if (r2 > r1)
- r1 = r2;
- r1 += 2;
+ size_t len;
/* -c is the last option; following arguments
that look like options are left for the
command to interpret. */
- command = (char *)malloc(r1);
+
+ len = wcslen(_PyOS_optarg) + 1 + 1;
+ command = (wchar_t *)malloc(sizeof(wchar_t) * len);
if (command == NULL)
Py_FatalError(
"not enough memory to copy -c argument");
- r2 = wcstombs(command, _PyOS_optarg, r1);
- if (r2 > r1-1)
- Py_FatalError(
- "not enough memory to copy -c argument");
- strcat(command, "\n");
- setlocale(LC_ALL, oldloc);
+ wcscpy(command, _PyOS_optarg);
+ command[len - 2] = '\n';
+ command[len - 1] = 0;
break;
}
@@ -543,8 +531,18 @@
}
if (command) {
- sts = PyRun_SimpleStringFlags(command, &cf) != 0;
+ PyObject *commandObj = PyUnicode_FromWideChar(
+ command, wcslen(command));
free(command);
+ if (commandObj != NULL) {
+ sts = PyRun_SimpleStringFlags(
+ _PyUnicode_AsString(commandObj), &cf) != 0;
+ }
+ else {
+ PyErr_Print();
+ sts = 1;
+ }
+ Py_DECREF(commandObj);
} else if (module) {
sts = RunModule(module, 1);
}
Modified: python/branches/py3k/Python/import.c
==============================================================================
--- python/branches/py3k/Python/import.c (original)
+++ python/branches/py3k/Python/import.c Wed Nov 12 00:04:59 2008
@@ -2793,6 +2793,7 @@
{
extern int fclose(FILE *);
PyObject *fob, *ret;
+ PyObject *pathobj;
struct filedescr *fdp;
char pathname[MAXPATHLEN+1];
FILE *fp = NULL;
@@ -2836,9 +2837,9 @@
fob = Py_None;
Py_INCREF(fob);
}
- ret = Py_BuildValue("Os(ssi)",
- fob, pathname, fdp->suffix, fdp->mode, fdp->type);
- Py_DECREF(fob);
+ pathobj = PyUnicode_DecodeFSDefault(pathname);
+ ret = Py_BuildValue("NN(ssi)",
+ fob, pathobj, fdp->suffix, fdp->mode, fdp->type);
PyMem_FREE(found_encoding);
return ret;
@@ -2849,7 +2850,9 @@
{
char *name;
PyObject *path = NULL;
- if (!PyArg_ParseTuple(args, "s|O:find_module", &name, &path))
+ if (!PyArg_ParseTuple(args, "es|O:find_module",
+ Py_FileSystemDefaultEncoding, &name,
+ &path))
return NULL;
return call_find_module(name, path);
}
Date: Wed Nov 12 00:04:59 2008
New Revision: 67190
Log:
#3705: Command-line arguments were not correctly decoded when the
terminal does not use UTF8.
Now the code propagates the unicode string as far as possible, and avoids
the conversion to char* which implicitely uses utf-8.
Reviewed by Benjamin.
Modified:
python/branches/py3k/Lib/test/test_cmd_line.py
python/branches/py3k/Misc/NEWS
python/branches/py3k/Modules/main.c
python/branches/py3k/Python/import.c
Modified: python/branches/py3k/Lib/test/test_cmd_line.py
==============================================================================
--- python/branches/py3k/Lib/test/test_cmd_line.py (original)
+++ python/branches/py3k/Lib/test/test_cmd_line.py Wed Nov 12 00:04:59 2008
@@ -135,6 +135,12 @@
self.exit_code('-c', 'pass'),
0)
+ # Test handling of non-ascii data
+ command = "assert(ord('\xe9') == 0xe9)"
+ self.assertEqual(
+ self.exit_code('-c', command),
+ 0)
+
def test_main():
test.support.run_unittest(CmdLineTest)
Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS (original)
+++ python/branches/py3k/Misc/NEWS Wed Nov 12 00:04:59 2008
@@ -13,6 +13,9 @@
Core and Builtins
-----------------
+- Issue #3705: Command-line arguments were not correctly decoded when the
+ terminal does not use UTF8.
+
Library
-------
Modified: python/branches/py3k/Modules/main.c
==============================================================================
--- python/branches/py3k/Modules/main.c (original)
+++ python/branches/py3k/Modules/main.c Wed Nov 12 00:04:59 2008
@@ -287,7 +287,7 @@
{
int c;
int sts;
- char *command = NULL;
+ wchar_t *command = NULL;
wchar_t *filename = NULL;
wchar_t *module = NULL;
FILE *fp = stdin;
@@ -299,7 +299,6 @@
int version = 0;
int saw_unbuffered_flag = 0;
PyCompilerFlags cf;
- char *oldloc;
cf.cf_flags = 0;
@@ -310,30 +309,19 @@
while ((c = _PyOS_GetOpt(argc, argv, PROGRAM_OPTS)) != EOF) {
if (c == 'c') {
- size_t r1, r2;
- oldloc = setlocale(LC_ALL, NULL);
- setlocale(LC_ALL, "");
- r1 = wcslen(_PyOS_optarg);
- r2 = wcstombs(NULL, _PyOS_optarg, r1);
- if (r2 == (size_t) -1)
- Py_FatalError(
- "cannot convert character encoding of -c argument");
- if (r2 > r1)
- r1 = r2;
- r1 += 2;
+ size_t len;
/* -c is the last option; following arguments
that look like options are left for the
command to interpret. */
- command = (char *)malloc(r1);
+
+ len = wcslen(_PyOS_optarg) + 1 + 1;
+ command = (wchar_t *)malloc(sizeof(wchar_t) * len);
if (command == NULL)
Py_FatalError(
"not enough memory to copy -c argument");
- r2 = wcstombs(command, _PyOS_optarg, r1);
- if (r2 > r1-1)
- Py_FatalError(
- "not enough memory to copy -c argument");
- strcat(command, "\n");
- setlocale(LC_ALL, oldloc);
+ wcscpy(command, _PyOS_optarg);
+ command[len - 2] = '\n';
+ command[len - 1] = 0;
break;
}
@@ -543,8 +531,18 @@
}
if (command) {
- sts = PyRun_SimpleStringFlags(command, &cf) != 0;
+ PyObject *commandObj = PyUnicode_FromWideChar(
+ command, wcslen(command));
free(command);
+ if (commandObj != NULL) {
+ sts = PyRun_SimpleStringFlags(
+ _PyUnicode_AsString(commandObj), &cf) != 0;
+ }
+ else {
+ PyErr_Print();
+ sts = 1;
+ }
+ Py_DECREF(commandObj);
} else if (module) {
sts = RunModule(module, 1);
}
Modified: python/branches/py3k/Python/import.c
==============================================================================
--- python/branches/py3k/Python/import.c (original)
+++ python/branches/py3k/Python/import.c Wed Nov 12 00:04:59 2008
@@ -2793,6 +2793,7 @@
{
extern int fclose(FILE *);
PyObject *fob, *ret;
+ PyObject *pathobj;
struct filedescr *fdp;
char pathname[MAXPATHLEN+1];
FILE *fp = NULL;
@@ -2836,9 +2837,9 @@
fob = Py_None;
Py_INCREF(fob);
}
- ret = Py_BuildValue("Os(ssi)",
- fob, pathname, fdp->suffix, fdp->mode, fdp->type);
- Py_DECREF(fob);
+ pathobj = PyUnicode_DecodeFSDefault(pathname);
+ ret = Py_BuildValue("NN(ssi)",
+ fob, pathobj, fdp->suffix, fdp->mode, fdp->type);
PyMem_FREE(found_encoding);
return ret;
@@ -2849,7 +2850,9 @@
{
char *name;
PyObject *path = NULL;
- if (!PyArg_ParseTuple(args, "s|O:find_module", &name, &path))
+ if (!PyArg_ParseTuple(args, "es|O:find_module",
+ Py_FileSystemDefaultEncoding, &name,
+ &path))
return NULL;
return call_find_module(name, path);
}