[Python-3000-checkins] r66867 - in python/branches/py3k: Lib/test/test_traceback.py Misc/NEWS Parser/tokenizer.c Python/traceback.c

amaury.forgeotdarc

2008-10-09 23:37:49 UTC

Author: amaury.forgeotdarc
Date: Fri Oct 10 01:37:48 2008
New Revision: 66867

Log:
Issues #2384 and #3975: Tracebacks were not correctly printed when the source file
contains a ``coding:`` header: the wrong line was displayed, and the encoding was not respected.

Patch by Victor Stinner.

Modified:
python/branches/py3k/Lib/test/test_traceback.py
python/branches/py3k/Misc/NEWS
python/branches/py3k/Parser/tokenizer.c
python/branches/py3k/Python/traceback.c

Modified: python/branches/py3k/Lib/test/test_traceback.py
==============================================================================
--- python/branches/py3k/Lib/test/test_traceback.py (original)
+++ python/branches/py3k/Lib/test/test_traceback.py Fri Oct 10 01:37:48 2008
@@ -6,6 +6,7 @@
import unittest
import re
from test.support import run_unittest, is_jython, Error, captured_output
+from test.support import TESTFN, unlink

import traceback

@@ -90,6 +91,70 @@
err = traceback.format_exception_only(None, None)
self.assertEqual(err, ['None\n'])

+ def test_encoded_file(self):
+ # Test that tracebacks are correctly printed for encoded source files:
+ # - correct line number (Issue2384)
+ # - respect file encoding (Issue3975)
+ import tempfile, sys, subprocess, os
+
+ # The spawned subprocess has its stdout redirected to a PIPE, and its
+ # encoding may be different from the current interpreter, on Windows
+ # at least.
+ process = subprocess.Popen([sys.executable, "-c",
+ "import sys; print(sys.stdout.encoding)"],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT)
+ stdout, stderr = process.communicate()
+ output_encoding = str(stdout, 'ascii').splitlines()[0]
+
+ def do_test(firstlines, message, charset, lineno):
+ # Raise the message in a subprocess, and catch the output
+ try:
+ output = open(TESTFN, "w", encoding=charset)
+ output.write("""{0}if 1:
+ import traceback;
+ raise RuntimeError('{1}')
+ """.format(firstlines, message))
+ output.close()
+ process = subprocess.Popen([sys.executable, TESTFN],
+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ stdout, stderr = process.communicate()
+ stdout = stdout.decode(output_encoding).splitlines()
+ finally:
+ unlink(TESTFN)
+
+ # The source lines are encoded with the 'backslashreplace' handler
+ encoded_message = message.encode(output_encoding,
+ 'backslashreplace')
+ # and we just decoded them with the output_encoding.
+ message_ascii = encoded_message.decode(output_encoding)
+
+ err_line = "raise RuntimeError('{0}')".format(message_ascii)
+ err_msg = "RuntimeError: {0}".format(message_ascii)
+
+ self.assert_(("line %s" % lineno) in stdout[1],
+ "Invalid line number: {0!r} instead of {1}".format(
+ stdout[1], lineno))
+ self.assert_(stdout[2].endswith(err_line),
+ "Invalid traceback line: {0!r} instead of {1!r}".format(
+ stdout[2], err_line))
+ self.assert_(stdout[3] == err_msg,
+ "Invalid error message: {0!r} instead of {1!r}".format(
+ stdout[3], err_msg))
+
+ do_test("", "foo", "ascii", 3)
+ for charset in ("ascii", "iso-8859-1", "utf-8", "GBK"):
+ if charset == "ascii":
+ text = "foo"
+ elif charset == "GBK":
+ text = "\u4E02\u5100"
+ else:
+ text = "h\xe9 ho"
+ do_test("# coding: {0}\n".format(charset),
+ text, charset, 4)
+ do_test("#!shebang\n# coding: {0}\n".format(charset),
+ text, charset, 5)
+

class TracebackFormatTests(unittest.TestCase):

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS (original)
+++ python/branches/py3k/Misc/NEWS Fri Oct 10 01:37:48 2008
@@ -15,6 +15,10 @@
Core and Builtins
-----------------

+- Issues #2384 and #3975: Tracebacks were not correctly printed when the
+ source file contains a ``coding:`` header: the wrong line was displayed, and
+ the encoding was not respected.
+
- Issue #3740: Null-initialize module state.

- Issue #3946: PyObject_CheckReadBuffer crashed on a memoryview object.

Modified: python/branches/py3k/Parser/tokenizer.c
==============================================================================
--- python/branches/py3k/Parser/tokenizer.c (original)
+++ python/branches/py3k/Parser/tokenizer.c Fri Oct 10 01:37:48 2008
@@ -461,6 +461,14 @@
readline = PyObject_GetAttrString(stream, "readline");
tok->decoding_readline = readline;

+ /* The file has been reopened; parsing will restart from
+ * the beginning of the file, we have to reset the line number.
+ * But this function has been called from inside tok_nextc() which
+ * will increment lineno before it returns. So we set it -1 so that
+ * the next call to tok_nextc() will start with tok->lineno == 0.
+ */
+ tok->lineno = -1;
+
cleanup:
Py_XDECREF(stream);
Py_XDECREF(io);

Modified: python/branches/py3k/Python/traceback.c
==============================================================================
--- python/branches/py3k/Python/traceback.c (original)
+++ python/branches/py3k/Python/traceback.c Fri Oct 10 01:37:48 2008
@@ -8,9 +8,15 @@
#include "structmember.h"
#include "osdefs.h"
#include "traceback.h"
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif

#define OFF(x) offsetof(PyTracebackObject, x)

+/* Method from Parser/tokenizer.c */
+extern char * PyTokenizer_FindEncoding(int);
+
static PyObject *
tb_dir(PyTracebackObject *self)
{
@@ -128,102 +134,156 @@
return 0;
}

+static int
+_Py_FindSourceFile(const char* filename, char* namebuf, size_t namelen, int open_flags)
+{
+ int i;
+ int fd = -1;
+ PyObject *v;
+ Py_ssize_t _npath;
+ int npath;
+ size_t taillen;
+ PyObject *syspath;
+ const char* path;
+ const char* tail;
+ Py_ssize_t len;
+
+ /* Search tail of filename in sys.path before giving up */
+ tail = strrchr(filename, SEP);
+ if (tail == NULL)
+ tail = filename;
+ else
+ tail++;
+ taillen = strlen(tail);
+
+ syspath = PySys_GetObject("path");
+ if (syspath == NULL || !PyList_Check(syspath))
+ return -1;
+ _npath = PyList_Size(syspath);
+ npath = Py_SAFE_DOWNCAST(_npath, Py_ssize_t, int);
+
+ for (i = 0; i < npath; i++) {
+ v = PyList_GetItem(syspath, i);
+ if (v == NULL) {
+ PyErr_Clear();
+ break;
+ }
+ if (!PyUnicode_Check(v))
+ continue;
+ path = _PyUnicode_AsStringAndSize(v, &len);
+ if (len + 1 + taillen >= (Py_ssize_t)namelen - 1)
+ continue; /* Too long */
+ strcpy(namebuf, path);
+ if (strlen(namebuf) != len)
+ continue; /* v contains '\0' */
+ if (len > 0 && namebuf[len-1] != SEP)
+ namebuf[len++] = SEP;
+ strcpy(namebuf+len, tail);
+ Py_BEGIN_ALLOW_THREADS
+ fd = open(namebuf, open_flags);
+ Py_END_ALLOW_THREADS
+ if (0 <= fd) {
+ return fd;
+ }
+ }
+ return -1;
+}
+
int
_Py_DisplaySourceLine(PyObject *f, const char *filename, int lineno, int indent)
{
int err = 0;
- FILE *xfp = NULL;
- char linebuf[2000];
+ int fd;
int i;
- char namebuf[MAXPATHLEN+1];
+ char *found_encoding;
+ char *encoding;
+ PyObject *fob = NULL;
+ PyObject *lineobj = NULL;
+#ifdef O_BINARY
+ const int open_flags = O_RDONLY | O_BINARY; /* necessary for Windows */
+#else
+ const int open_flags = O_RDONLY;
+#endif
+ char buf[MAXPATHLEN+1];
+ Py_UNICODE *u, *p;
+ Py_ssize_t len;

+ /* open the file */
if (filename == NULL)
- return -1;
- xfp = fopen(filename, "r" PY_STDIOTEXTMODE);
- if (xfp == NULL) {
- /* Search tail of filename in sys.path before giving up */
- PyObject *path;
- const char *tail = strrchr(filename, SEP);
- if (tail == NULL)
- tail = filename;
- else
- tail++;
- path = PySys_GetObject("path");
- if (path != NULL && PyList_Check(path)) {
- Py_ssize_t _npath = PyList_Size(path);
- int npath = Py_SAFE_DOWNCAST(_npath, Py_ssize_t, int);
- size_t taillen = strlen(tail);
- for (i = 0; i < npath; i++) {
- PyObject *v = PyList_GetItem(path, i);
- if (v == NULL) {
- PyErr_Clear();
- break;
- }
- if (PyBytes_Check(v)) {
- size_t len;
- len = PyBytes_GET_SIZE(v);
- if (len + 1 + taillen >= MAXPATHLEN)
- continue; /* Too long */
- strcpy(namebuf, PyBytes_AsString(v));
- if (strlen(namebuf) != len)
- continue; /* v contains '\0' */
- if (len > 0 && namebuf[len-1] != SEP)
- namebuf[len++] = SEP;
- strcpy(namebuf+len, tail);
- xfp = fopen(namebuf, "r" PY_STDIOTEXTMODE);
- if (xfp != NULL) {
- filename = namebuf;
- break;
- }
- }
- }
- }
+ return 0;
+ Py_BEGIN_ALLOW_THREADS
+ fd = open(filename, open_flags);
+ Py_END_ALLOW_THREADS
+ if (fd < 0) {
+ fd = _Py_FindSourceFile(filename, buf, sizeof(buf), open_flags);
+ if (fd < 0)
+ return 0;
+ filename = buf;
}

- if (xfp == NULL)
- return err;
- if (err != 0) {
- fclose(xfp);
- return err;
- }
+ /* use the right encoding to decode the file as unicode */
+ found_encoding = PyTokenizer_FindEncoding(fd);
+ encoding = (found_encoding != NULL) ? found_encoding :
+ (char*)PyUnicode_GetDefaultEncoding();
+ lseek(fd, 0, 0); /* Reset position */
+ fob = PyFile_FromFd(fd, (char*)filename, "r", -1, (char*)encoding,
+ NULL, NULL, 1);
+ PyMem_FREE(found_encoding);
+ if (fob == NULL) {
+ PyErr_Clear();
+ close(fd);
+ return 0;
+ }

+ /* get the line number lineno */
for (i = 0; i < lineno; i++) {
- char* pLastChar = &linebuf[sizeof(linebuf)-2];
- do {
- *pLastChar = '\0';
- if (Py_UniversalNewlineFgets(linebuf, sizeof linebuf, xfp, NULL) == NULL)
- break;
- /* fgets read *something*; if it didn't get as
- far as pLastChar, it must have found a newline
- or hit the end of the file; if pLastChar is \n,
- it obviously found a newline; else we haven't
- yet seen a newline, so must continue */
- } while (*pLastChar != '\0' && *pLastChar != '\n');
- }
- if (i == lineno) {
- char buf[11];
- char *p = linebuf;
- while (*p == ' ' || *p == '\t' || *p == '\014')
- p++;
-
- /* Write some spaces before the line */
- strcpy(buf, " ");
- assert (strlen(buf) == 10);
- while (indent > 0) {
- if(indent < 10)
- buf[indent] = '\0';
- err = PyFile_WriteString(buf, f);
- if (err != 0)
- break;
- indent -= 10;
+ Py_XDECREF(lineobj);
+ lineobj = PyFile_GetLine(fob, -1);
+ if (!lineobj) {
+ err = -1;
+ break;
}
+ }
+ Py_DECREF(fob);
+ if (!lineobj || !PyUnicode_Check(lineobj)) {
+ Py_XDECREF(lineobj);
+ return err;
+ }

- if (err == 0)
- err = PyFile_WriteString(p, f);
- if (err == 0 && strchr(p, '\n') == NULL)
- err = PyFile_WriteString("\n", f);
+ /* remove the indentation of the line */
+ u = PyUnicode_AS_UNICODE(lineobj);
+ len = PyUnicode_GET_SIZE(lineobj);
+ for (p=u; *p == ' ' || *p == '\t' || *p == '\014'; p++)
+ len--;
+ if (u != p) {
+ PyObject *truncated;
+ truncated = PyUnicode_FromUnicode(p, len);
+ if (truncated) {
+ Py_DECREF(lineobj);
+ lineobj = truncated;
+ } else {
+ PyErr_Clear();
+ }
}
- fclose(xfp);
+
+ /* Write some spaces before the line */
+ strcpy(buf, " ");
+ assert (strlen(buf) == 10);
+ while (indent > 0) {
+ if(indent < 10)
+ buf[indent] = '\0';
+ err = PyFile_WriteString(buf, f);
+ if (err != 0)
+ break;
+ indent -= 10;
+ }
+
+ /* finally display the line */
+ if (err == 0)
+ err = PyFile_WriteObject(lineobj, f, Py_PRINT_RAW);
+ Py_DECREF(lineobj);
+ if (err == 0)
+ err = PyFile_WriteString("\n", f);
return err;
}