Commit 0afbff14 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Optimize calling str() and unicode()

They are tricky since these are types, which means they invoke
the relatively-complicated constructor logic.  ie str() doesn't
just call __str__ on the argument: if the result is a subclass
of str, it calls result.__init__().  Similarly for unicode, except
unicode is even trickier since it takes some more arguments, one
of which is "encoding" which will have non-type-based dynamic
behavior.

I didn't realize that at first and optimized unicode() by exposing an
inner version of it that takes its arguments in registers, which we
can take advantage of using our jit-arg-rearrangement capability.
This means we have to do parts of PyArg_ParseTuple ourselves, so I
added a PyArg_ParseSingle that runs a single object through the
arg-conversion code.  PyArg_ParseSingle could be further optimized if
we want to.  Or rather, if we have functions of the form
PyArg_ParseSingle_s (which corresponds to the "s" format code) we
could skip some more of the overhead.

I had to disable most of that once I realized the encoding issue, but
I left it in since hopefully we will be able to use it again once
we have some "do some guards after mutations if we know how to resume
after a failed guard" rewriter support.
parent c42d3395
......@@ -16,6 +16,7 @@ extern "C" {
#define PyArg_Parse _PyArg_Parse_SizeT
#define PyArg_ParseTuple _PyArg_ParseTuple_SizeT
#define PyArg_ParseTupleAndKeywords _PyArg_ParseTupleAndKeywords_SizeT
#define PyArg_ParseSingle _PyArg_ParseSingle_SizeT
#define PyArg_VaParse _PyArg_VaParse_SizeT
#define PyArg_VaParseTupleAndKeywords _PyArg_VaParseTupleAndKeywords_SizeT
#define Py_BuildValue _Py_BuildValue_SizeT
......@@ -28,6 +29,8 @@ PyAPI_FUNC(int) PyArg_Parse(PyObject *, const char *, ...) PYSTON_NOEXCEPT;
PyAPI_FUNC(int) PyArg_ParseTuple(PyObject *, const char *, ...) Py_FORMAT_PARSETUPLE(PyArg_ParseTuple, 2, 3) PYSTON_NOEXCEPT;
PyAPI_FUNC(int) PyArg_ParseTupleAndKeywords(PyObject *, PyObject *,
const char *, char **, ...) PYSTON_NOEXCEPT;
// Pyston addition:
PyAPI_FUNC(int) PyArg_ParseSingle(PyObject* obj, int arg_idx, const char* fname, const char* format, ...) PYSTON_NOEXCEPT;
PyAPI_FUNC(int) PyArg_UnpackTuple(PyObject *, const char *, Py_ssize_t, Py_ssize_t, ...) PYSTON_NOEXCEPT;
PyAPI_FUNC(PyObject *) Py_BuildValue(const char *, ...) PYSTON_NOEXCEPT;
PyAPI_FUNC(PyObject *) _Py_BuildValue_SizeT(const char *, ...) PYSTON_NOEXCEPT;
......
......@@ -1408,6 +1408,9 @@ PyAPI_FUNC(int) _PyUnicode_IsAlpha(
Py_UNICODE ch /* Unicode character */
) PYSTON_NOEXCEPT;
// Pyston addition:
PyAPI_FUNC(PyObject*) unicode_new_inner(PyObject* x, char* encoding, char* errors) PYSTON_NOEXCEPT;
#ifdef __cplusplus
}
#endif
......
......@@ -8745,6 +8745,15 @@ static PyBufferProcs unicode_as_buffer = {
static PyObject *
unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
PyObject* unicode_new_inner(PyObject* x, char* encoding, char* errors) {
if (x == NULL)
return (PyObject *)_PyUnicode_New(0);
if (encoding == NULL && errors == NULL)
return PyObject_Unicode(x);
else
return PyUnicode_FromEncodedObject(x, encoding, errors);
}
static PyObject *
unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
......@@ -8758,12 +8767,7 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:unicode",
kwlist, &x, &encoding, &errors))
return NULL;
if (x == NULL)
return (PyObject *)_PyUnicode_New(0);
if (encoding == NULL && errors == NULL)
return PyObject_Unicode(x);
else
return PyUnicode_FromEncodedObject(x, encoding, errors);
return unicode_new_inner(x, encoding, errors);
}
static PyObject *
......
......@@ -569,6 +569,35 @@ float_argument_error(PyObject *arg)
return 0;
}
int _PyArg_ParseSingle_SizeT(PyObject* obj, int arg_idx, const char* fname, const char* format, ...) {
va_list va;
char* msg;
char msgbuf[256];
assert(format[0] != '\0');
assert(format[0] != '(');
assert(format[0] != '|');
assert(format[0] != '|');
assert(format[1] != '*'); // would need to pass a non-null freelist
assert(format[0] != 'e'); // would need to pass a non-null freelist
va_start(va, format);
msg = convertsimple(obj, &format, &va, FLAG_SIZE_T, msgbuf, sizeof(msgbuf), NULL);
va_end(va);
if (msg) {
int levels[1];
levels[0] = 0;
seterror(arg_idx + 1, msg, levels, fname, NULL);
return 0;
}
// Should have consumed the entire format string:
assert(format[0] == '\0');
return 1;
}
/* Convert a non-tuple argument. Return NULL if conversion went OK,
or a string with a message describing the failure. The message is
formatted as "must be <desired type>, not <actual type>".
......
This diff is collapsed.
import codecs
### Codec APIs
class MyUnicode(unicode):
def __new__(*args):
print "MyUnicode.__new__", map(type, args)
return unicode.__new__(*args)
def __init__(*args):
print "MyUnicode.__init__", map(type, args)
def encode(input, errors='strict'):
raise Exception()
def decode(input, errors='strict'):
return (MyUnicode(u"."), 1)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.utf_8_encode(input, self.errors)[0]
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
_buffer_decode = codecs.utf_8_decode
class StreamWriter(codecs.StreamWriter):
encode = codecs.utf_8_encode
class StreamReader(codecs.StreamReader):
decode = codecs.utf_8_decode
codec = codecs.CodecInfo(
name='myunicode',
encode=encode,
decode=decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
def search(name):
if name == "myunicode":
return codec
codecs.register(search)
u = unicode("hello world", "myunicode", "strict")
print type(u)
......@@ -132,4 +132,9 @@ print
# These return longs:
print int("12938719238719827398172938712983791827938712987312")
print int(u"12938719238719827398172938712983791827938712987312")
print int("12938719238719827398172938712983791827938712987312", 16)
print int(u"12938719238719827398172938712983791827938712987312", 16)
print int(1e100)
print int(*[1e100])
print int(x=1e100)
......@@ -21,3 +21,8 @@ except TypeError as e:
# are being passed, but really they are not.
type.__call__(*[C2])
type.__call__(C2, **{})
try:
type.__call__(*[])
except TypeError as e:
print "caught typeerror"
......@@ -10,3 +10,13 @@ print
print repr("hello" + MyStr("world"))
print int(MyStr("2"))
class MyStr(str):
def __init__(*args):
print "MyStr.__init__", map(type, args)
class C(object):
def __str__(self):
return MyStr("hello world")
print type(str(C()))
......@@ -155,3 +155,13 @@ print "".join([u"\xB2", u"\xB3"])
import sys
print type(sys.maxunicode)
class MyUnicode(unicode):
def __init__(*args):
print "MyUnicode.__init__", map(type, args)
class C(object):
def __unicode__(self):
return MyUnicode("hello world")
print type(unicode(C()))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment