antoine.pitrou
2008-07-25 22:39:39 UTC
Author: antoine.pitrou
Date: Sat Jul 26 00:39:39 2008
New Revision: 65243
Log:
Merged revisions 65240-65242 via svnmerge from
svn+ssh://pythondev at svn.python.org/python/trunk
........
r65240 | antoine.pitrou | 2008-07-26 00:02:07 +0200 (sam., 26 juil. 2008) | 3 lines
add a pybench test for complex function calls (part of #1819)
........
r65241 | antoine.pitrou | 2008-07-26 00:13:52 +0200 (sam., 26 juil. 2008) | 4 lines
Raymond's patch for #1819: speedup function calls with named parameters
(35% faster according to pybench)
........
r65242 | antoine.pitrou | 2008-07-26 00:22:08 +0200 (sam., 26 juil. 2008) | 3 lines
add a NEWS entry
........
Modified:
python/branches/py3k/ (props changed)
python/branches/py3k/Misc/NEWS
python/branches/py3k/Python/ceval.c
python/branches/py3k/Tools/pybench/Calls.py
Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS (original)
+++ python/branches/py3k/Misc/NEWS Sat Jul 26 00:39:39 2008
@@ -9,6 +9,12 @@
*Release date: XX-XXX-2008*
+Core and Builtins
+-----------------
+
+- Issue #1819: function calls with several named parameters are now on
+ average 35% faster (as measured by pybench).
+
Library
-------
Modified: python/branches/py3k/Python/ceval.c
==============================================================================
--- python/branches/py3k/Python/ceval.c (original)
+++ python/branches/py3k/Python/ceval.c Sat Jul 26 00:39:39 2008
@@ -642,9 +642,9 @@
processor's own internal branch predication has a high likelihood of
success, resulting in a nearly zero-overhead transition to the
next opcode. A successful prediction saves a trip through the eval-loop
- including its two unpredictable branches, the HAS_ARG test and the
+ including its two unpredictable branches, the HAS_ARG test and the
switch-case. Combined with the processor's internal branch prediction,
- a successful PREDICT has the effect of making the two opcodes run as if
+ a successful PREDICT has the effect of making the two opcodes run as if
they were a single new opcode with the bodies combined.
If collecting opcode statistics, your choices are to either keep the
@@ -796,7 +796,7 @@
an argument which depends on the situation.
The global trace function is also called
whenever an exception is detected. */
- if (call_trace_protected(tstate->c_tracefunc,
+ if (call_trace_protected(tstate->c_tracefunc,
tstate->c_traceobj,
f, PyTrace_CALL, Py_None)) {
/* Trace function raised an error */
@@ -828,10 +828,10 @@
this wasn't always true before 2.3! PyFrame_New now sets
f->f_lasti to -1 (i.e. the index *before* the first instruction)
and YIELD_VALUE doesn't fiddle with f_lasti any more. So this
- does work. Promise.
+ does work. Promise.
When the PREDICT() macros are enabled, some opcode pairs follow in
- direct succession without updating f->f_lasti. A successful
+ direct succession without updating f->f_lasti. A successful
prediction effectively links the two codes together as if they
were a single new opcode; accordingly,f->f_lasti will point to
the first code in the pair (for instance, GET_ITER followed by
@@ -1678,7 +1678,7 @@
{
int totalargs = 1 + (oparg & 0xFF) + (oparg >> 8);
v = POP();
-
+
if (unpack_iterable(v, oparg & 0xFF, oparg >> 8,
stack_pointer + totalargs)) {
stack_pointer += totalargs;
@@ -2071,7 +2071,7 @@
because it prevents detection of a control-break in tight loops like
"while 1: pass". Compile with this option turned-on when you need
the speed-up and do not need break checking inside tight loops (ones
- that contain only instructions ending with goto fast_next_opcode).
+ that contain only instructions ending with goto fast_next_opcode).
*/
goto fast_next_opcode;
#else
@@ -2257,7 +2257,7 @@
break;
}
- case MAKE_CLOSURE:
+ case MAKE_CLOSURE:
case MAKE_FUNCTION:
{
int posdefaults = oparg & 0xff;
@@ -2267,7 +2267,7 @@
v = POP(); /* code object */
x = PyFunction_New(v, f->f_globals);
Py_DECREF(v);
-
+
if (x != NULL && opcode == MAKE_CLOSURE) {
v = POP();
err = PyFunction_SetClosure(x, v);
@@ -2650,6 +2650,7 @@
}
}
for (i = 0; i < kwcount; i++) {
+ PyObject **co_varnames;
PyObject *keyword = kws[2*i];
PyObject *value = kws[2*i + 1];
int j;
@@ -2659,16 +2660,25 @@
co->co_name);
goto fail;
}
- /* XXX slow -- speed up using dictionary? */
+ /* Speed hack: do raw pointer compares. As names are
+ normally interned this should almost always hit. */
+ co_varnames = PySequence_Fast_ITEMS(co->co_varnames);
+ for (j = 0;
+ j < co->co_argcount + co->co_kwonlyargcount;
+ j++) {
+ PyObject *nm = co_varnames[j];
+ if (nm == keyword)
+ goto kw_found;
+ }
+ /* Slow fallback, just in case */
for (j = 0;
j < co->co_argcount + co->co_kwonlyargcount;
j++) {
- PyObject *nm = PyTuple_GET_ITEM(
- co->co_varnames, j);
+ PyObject *nm = co_varnames[j];
int cmp = PyObject_RichCompareBool(
keyword, nm, Py_EQ);
if (cmp > 0)
- break;
+ goto kw_found;
else if (cmp < 0)
goto fail;
}
@@ -2685,20 +2695,20 @@
goto fail;
}
PyDict_SetItem(kwdict, keyword, value);
+ continue;
}
- else {
- if (GETLOCAL(j) != NULL) {
- PyErr_Format(PyExc_TypeError,
- "%U() got multiple "
- "values for keyword "
- "argument '%S'",
- co->co_name,
- keyword);
- goto fail;
- }
- Py_INCREF(value);
- SETLOCAL(j, value);
+kw_found:
+ if (GETLOCAL(j) != NULL) {
+ PyErr_Format(PyExc_TypeError,
+ "%U() got multiple "
+ "values for keyword "
+ "argument '%S'",
+ co->co_name,
+ keyword);
+ goto fail;
}
+ Py_INCREF(value);
+ SETLOCAL(j, value);
}
if (co->co_kwonlyargcount > 0) {
for (i = co->co_argcount;
@@ -2930,7 +2940,7 @@
/* Iterate v argcnt times and store the results on the stack (via decreasing
sp). Return 1 for success, 0 if error.
-
+
If argcntafter == -1, do a simple unpack. If it is >= 0, do an unpack
with a variable target.
*/
Modified: python/branches/py3k/Tools/pybench/Calls.py
==============================================================================
--- python/branches/py3k/Tools/pybench/Calls.py (original)
+++ python/branches/py3k/Tools/pybench/Calls.py Sat Jul 26 00:39:39 2008
@@ -109,6 +109,64 @@
###
+class ComplexPythonFunctionCalls(Test):
+
+ version = 2.0
+ operations = 4*5
+ rounds = 100000
+
+ def test(self):
+
+ # define functions
+ def f(a,b,c,d=1,e=2,f=3):
+ return f
+
+ args = 1,2
+ kwargs = dict(c=3,d=4,e=5)
+
+ # do calls
+ for i in range(self.rounds):
+ f(a=i,b=i,c=i)
+ f(f=i,e=i,d=i,c=2,b=i,a=3)
+ f(1,b=i,**kwargs)
+ f(*args,**kwargs)
+
+ f(a=i,b=i,c=i)
+ f(f=i,e=i,d=i,c=2,b=i,a=3)
+ f(1,b=i,**kwargs)
+ f(*args,**kwargs)
+
+ f(a=i,b=i,c=i)
+ f(f=i,e=i,d=i,c=2,b=i,a=3)
+ f(1,b=i,**kwargs)
+ f(*args,**kwargs)
+
+ f(a=i,b=i,c=i)
+ f(f=i,e=i,d=i,c=2,b=i,a=3)
+ f(1,b=i,**kwargs)
+ f(*args,**kwargs)
+
+ f(a=i,b=i,c=i)
+ f(f=i,e=i,d=i,c=2,b=i,a=3)
+ f(1,b=i,**kwargs)
+ f(*args,**kwargs)
+
+
+ def calibrate(self):
+
+ # define functions
+ def f(a,b,c,d=1,e=2,f=3):
+ return f
+
+ args = 1,2
+ kwargs = dict(c=3,d=4,e=5)
+
+ # do calls
+ for i in range(self.rounds):
+ pass
+
+###
+
class BuiltinFunctionCalls(Test):
version = 2.0
Date: Sat Jul 26 00:39:39 2008
New Revision: 65243
Log:
Merged revisions 65240-65242 via svnmerge from
svn+ssh://pythondev at svn.python.org/python/trunk
........
r65240 | antoine.pitrou | 2008-07-26 00:02:07 +0200 (sam., 26 juil. 2008) | 3 lines
add a pybench test for complex function calls (part of #1819)
........
r65241 | antoine.pitrou | 2008-07-26 00:13:52 +0200 (sam., 26 juil. 2008) | 4 lines
Raymond's patch for #1819: speedup function calls with named parameters
(35% faster according to pybench)
........
r65242 | antoine.pitrou | 2008-07-26 00:22:08 +0200 (sam., 26 juil. 2008) | 3 lines
add a NEWS entry
........
Modified:
python/branches/py3k/ (props changed)
python/branches/py3k/Misc/NEWS
python/branches/py3k/Python/ceval.c
python/branches/py3k/Tools/pybench/Calls.py
Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS (original)
+++ python/branches/py3k/Misc/NEWS Sat Jul 26 00:39:39 2008
@@ -9,6 +9,12 @@
*Release date: XX-XXX-2008*
+Core and Builtins
+-----------------
+
+- Issue #1819: function calls with several named parameters are now on
+ average 35% faster (as measured by pybench).
+
Library
-------
Modified: python/branches/py3k/Python/ceval.c
==============================================================================
--- python/branches/py3k/Python/ceval.c (original)
+++ python/branches/py3k/Python/ceval.c Sat Jul 26 00:39:39 2008
@@ -642,9 +642,9 @@
processor's own internal branch predication has a high likelihood of
success, resulting in a nearly zero-overhead transition to the
next opcode. A successful prediction saves a trip through the eval-loop
- including its two unpredictable branches, the HAS_ARG test and the
+ including its two unpredictable branches, the HAS_ARG test and the
switch-case. Combined with the processor's internal branch prediction,
- a successful PREDICT has the effect of making the two opcodes run as if
+ a successful PREDICT has the effect of making the two opcodes run as if
they were a single new opcode with the bodies combined.
If collecting opcode statistics, your choices are to either keep the
@@ -796,7 +796,7 @@
an argument which depends on the situation.
The global trace function is also called
whenever an exception is detected. */
- if (call_trace_protected(tstate->c_tracefunc,
+ if (call_trace_protected(tstate->c_tracefunc,
tstate->c_traceobj,
f, PyTrace_CALL, Py_None)) {
/* Trace function raised an error */
@@ -828,10 +828,10 @@
this wasn't always true before 2.3! PyFrame_New now sets
f->f_lasti to -1 (i.e. the index *before* the first instruction)
and YIELD_VALUE doesn't fiddle with f_lasti any more. So this
- does work. Promise.
+ does work. Promise.
When the PREDICT() macros are enabled, some opcode pairs follow in
- direct succession without updating f->f_lasti. A successful
+ direct succession without updating f->f_lasti. A successful
prediction effectively links the two codes together as if they
were a single new opcode; accordingly,f->f_lasti will point to
the first code in the pair (for instance, GET_ITER followed by
@@ -1678,7 +1678,7 @@
{
int totalargs = 1 + (oparg & 0xFF) + (oparg >> 8);
v = POP();
-
+
if (unpack_iterable(v, oparg & 0xFF, oparg >> 8,
stack_pointer + totalargs)) {
stack_pointer += totalargs;
@@ -2071,7 +2071,7 @@
because it prevents detection of a control-break in tight loops like
"while 1: pass". Compile with this option turned-on when you need
the speed-up and do not need break checking inside tight loops (ones
- that contain only instructions ending with goto fast_next_opcode).
+ that contain only instructions ending with goto fast_next_opcode).
*/
goto fast_next_opcode;
#else
@@ -2257,7 +2257,7 @@
break;
}
- case MAKE_CLOSURE:
+ case MAKE_CLOSURE:
case MAKE_FUNCTION:
{
int posdefaults = oparg & 0xff;
@@ -2267,7 +2267,7 @@
v = POP(); /* code object */
x = PyFunction_New(v, f->f_globals);
Py_DECREF(v);
-
+
if (x != NULL && opcode == MAKE_CLOSURE) {
v = POP();
err = PyFunction_SetClosure(x, v);
@@ -2650,6 +2650,7 @@
}
}
for (i = 0; i < kwcount; i++) {
+ PyObject **co_varnames;
PyObject *keyword = kws[2*i];
PyObject *value = kws[2*i + 1];
int j;
@@ -2659,16 +2660,25 @@
co->co_name);
goto fail;
}
- /* XXX slow -- speed up using dictionary? */
+ /* Speed hack: do raw pointer compares. As names are
+ normally interned this should almost always hit. */
+ co_varnames = PySequence_Fast_ITEMS(co->co_varnames);
+ for (j = 0;
+ j < co->co_argcount + co->co_kwonlyargcount;
+ j++) {
+ PyObject *nm = co_varnames[j];
+ if (nm == keyword)
+ goto kw_found;
+ }
+ /* Slow fallback, just in case */
for (j = 0;
j < co->co_argcount + co->co_kwonlyargcount;
j++) {
- PyObject *nm = PyTuple_GET_ITEM(
- co->co_varnames, j);
+ PyObject *nm = co_varnames[j];
int cmp = PyObject_RichCompareBool(
keyword, nm, Py_EQ);
if (cmp > 0)
- break;
+ goto kw_found;
else if (cmp < 0)
goto fail;
}
@@ -2685,20 +2695,20 @@
goto fail;
}
PyDict_SetItem(kwdict, keyword, value);
+ continue;
}
- else {
- if (GETLOCAL(j) != NULL) {
- PyErr_Format(PyExc_TypeError,
- "%U() got multiple "
- "values for keyword "
- "argument '%S'",
- co->co_name,
- keyword);
- goto fail;
- }
- Py_INCREF(value);
- SETLOCAL(j, value);
+kw_found:
+ if (GETLOCAL(j) != NULL) {
+ PyErr_Format(PyExc_TypeError,
+ "%U() got multiple "
+ "values for keyword "
+ "argument '%S'",
+ co->co_name,
+ keyword);
+ goto fail;
}
+ Py_INCREF(value);
+ SETLOCAL(j, value);
}
if (co->co_kwonlyargcount > 0) {
for (i = co->co_argcount;
@@ -2930,7 +2940,7 @@
/* Iterate v argcnt times and store the results on the stack (via decreasing
sp). Return 1 for success, 0 if error.
-
+
If argcntafter == -1, do a simple unpack. If it is >= 0, do an unpack
with a variable target.
*/
Modified: python/branches/py3k/Tools/pybench/Calls.py
==============================================================================
--- python/branches/py3k/Tools/pybench/Calls.py (original)
+++ python/branches/py3k/Tools/pybench/Calls.py Sat Jul 26 00:39:39 2008
@@ -109,6 +109,64 @@
###
+class ComplexPythonFunctionCalls(Test):
+
+ version = 2.0
+ operations = 4*5
+ rounds = 100000
+
+ def test(self):
+
+ # define functions
+ def f(a,b,c,d=1,e=2,f=3):
+ return f
+
+ args = 1,2
+ kwargs = dict(c=3,d=4,e=5)
+
+ # do calls
+ for i in range(self.rounds):
+ f(a=i,b=i,c=i)
+ f(f=i,e=i,d=i,c=2,b=i,a=3)
+ f(1,b=i,**kwargs)
+ f(*args,**kwargs)
+
+ f(a=i,b=i,c=i)
+ f(f=i,e=i,d=i,c=2,b=i,a=3)
+ f(1,b=i,**kwargs)
+ f(*args,**kwargs)
+
+ f(a=i,b=i,c=i)
+ f(f=i,e=i,d=i,c=2,b=i,a=3)
+ f(1,b=i,**kwargs)
+ f(*args,**kwargs)
+
+ f(a=i,b=i,c=i)
+ f(f=i,e=i,d=i,c=2,b=i,a=3)
+ f(1,b=i,**kwargs)
+ f(*args,**kwargs)
+
+ f(a=i,b=i,c=i)
+ f(f=i,e=i,d=i,c=2,b=i,a=3)
+ f(1,b=i,**kwargs)
+ f(*args,**kwargs)
+
+
+ def calibrate(self):
+
+ # define functions
+ def f(a,b,c,d=1,e=2,f=3):
+ return f
+
+ args = 1,2
+ kwargs = dict(c=3,d=4,e=5)
+
+ # do calls
+ for i in range(self.rounds):
+ pass
+
+###
+
class BuiltinFunctionCalls(Test):
version = 2.0