Commit a7653f03 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #1187 from kmod/perf9

Extract the logic from DEFAULT_CLASS_SIMPLE
parents 9c925c4e 4f83635c
......@@ -724,6 +724,77 @@ extern std::vector<BoxedClass*> classes;
class BoxedDict;
class BoxedString;
#if STAT_ALLOCATION_TYPES
#define ALLOC_STATS(cls) \
if (cls->tp_name) { \
std::string per_name_alloc_name = "alloc." + std::string(cls->tp_name); \
std::string per_name_allocsize_name = "allocsize." + std::string(cls->tp_name); \
Stats::log(Stats::getStatCounter(per_name_alloc_name)); \
Stats::log(Stats::getStatCounter(per_name_allocsize_name), size); \
}
#define ALLOC_STATS_VAR(cls) \
if (cls->tp_name) { \
std::string per_name_alloc_name = "alloc." + std::string(cls->tp_name); \
std::string per_name_alloc_name0 = "alloc." + std::string(cls->tp_name) + "(0)"; \
std::string per_name_allocsize_name = "allocsize." + std::string(cls->tp_name); \
std::string per_name_allocsize_name0 = "allocsize." + std::string(cls->tp_name) + "(0)"; \
static StatCounter alloc_name(per_name_alloc_name); \
static StatCounter alloc_name0(per_name_alloc_name0); \
static StatCounter allocsize_name(per_name_allocsize_name); \
static StatCounter allocsize_name0(per_name_allocsize_name0); \
if (nitems == 0) { \
alloc_name0.log(); \
allocsize_name0.log(_PyObject_VAR_SIZE(cls, nitems)); \
} else { \
alloc_name.log(); \
allocsize_name.log(_PyObject_VAR_SIZE(cls, nitems)); \
} \
}
#else
#define ALLOC_STATS(cls)
#define ALLOC_STATS_VAR(cls)
#endif
#if STAT_ALLOCATION_TYPES
#define ALLOC_STATS(cls) \
if (cls->tp_name) { \
std::string per_name_alloc_name = "alloc." + std::string(cls->tp_name); \
std::string per_name_allocsize_name = "allocsize." + std::string(cls->tp_name); \
Stats::log(Stats::getStatCounter(per_name_alloc_name)); \
Stats::log(Stats::getStatCounter(per_name_allocsize_name), size); \
}
#define ALLOC_STATS_VAR(cls) \
if (cls->tp_name) { \
std::string per_name_alloc_name = "alloc." + std::string(cls->tp_name); \
std::string per_name_alloc_name0 = "alloc." + std::string(cls->tp_name) + "(0)"; \
std::string per_name_allocsize_name = "allocsize." + std::string(cls->tp_name); \
std::string per_name_allocsize_name0 = "allocsize." + std::string(cls->tp_name) + "(0)"; \
static StatCounter alloc_name(per_name_alloc_name); \
static StatCounter alloc_name0(per_name_alloc_name0); \
static StatCounter allocsize_name(per_name_allocsize_name); \
static StatCounter allocsize_name0(per_name_allocsize_name0); \
if (nitems == 0) { \
alloc_name0.log(); \
allocsize_name0.log(_PyObject_VAR_SIZE(cls, nitems)); \
} else { \
alloc_name.log(); \
allocsize_name.log(_PyObject_VAR_SIZE(cls, nitems)); \
} \
}
#else
#define ALLOC_STATS(cls)
#define ALLOC_STATS_VAR(cls)
#endif
// These are just dummy objects to help us differentiate operator new() versions from each other, since we can't use
// normal templating or different function names.
struct FastToken {};
extern FastToken FAST;
struct FastGCToken {};
extern FastGCToken FAST_GC;
// "Box" is the base class of any C++ type that implements a Python type. For example,
// BoxedString is the data structure that implements Python's str type, and BoxedString
// inherits from Box.
......@@ -738,11 +809,26 @@ private:
// Appends a new value to the hcattrs array.
void appendNewHCAttr(BORROWED(Box*) val, SetattrRewriteArgs* rewrite_args);
protected:
// newFast(): a fast implementation of operator new() that optimizes for the common case. It does this
// by inlining the following methods and skipping most of the dynamic checks:
// - Box::operator new
// - cls->tp_alloc
// - PyType_GenericAlloc
// - PyObject_Init
// The restrictions on when you can use the fast variant are encoded as assertions in the implementation
// (see runtime/types.h)
template <bool is_gc> static void* newFast(size_t size, BoxedClass* cls);
public:
// Add a no-op constructor to make sure that we don't zero-initialize cls
Box() {}
void* operator new(size_t size, BoxedClass* cls) __attribute__((visibility("default")));
void* operator new(size_t size, BoxedClass* cls, FastToken _dummy) { return newFast<false>(size, cls); }
void* operator new(size_t size, BoxedClass* cls, FastGCToken _dummy) { return newFast<true>(size, cls); }
void operator delete(void* ptr) __attribute__((visibility("default"))) { abort(); }
_PyObject_HEAD_EXTRA
......@@ -827,84 +913,12 @@ static_assert(offsetof(Box, cls) == offsetof(struct _object, ob_type), "");
return Box::operator new(size, default_cls); \
}
#if STAT_ALLOCATION_TYPES
#define ALLOC_STATS(cls) \
if (cls->tp_name) { \
std::string per_name_alloc_name = "alloc." + std::string(cls->tp_name); \
std::string per_name_allocsize_name = "allocsize." + std::string(cls->tp_name); \
Stats::log(Stats::getStatCounter(per_name_alloc_name)); \
Stats::log(Stats::getStatCounter(per_name_allocsize_name), size); \
}
#define ALLOC_STATS_VAR(cls) \
if (cls->tp_name) { \
std::string per_name_alloc_name = "alloc." + std::string(cls->tp_name); \
std::string per_name_alloc_name0 = "alloc." + std::string(cls->tp_name) + "(0)"; \
std::string per_name_allocsize_name = "allocsize." + std::string(cls->tp_name); \
std::string per_name_allocsize_name0 = "allocsize." + std::string(cls->tp_name) + "(0)"; \
static StatCounter alloc_name(per_name_alloc_name); \
static StatCounter alloc_name0(per_name_alloc_name0); \
static StatCounter allocsize_name(per_name_allocsize_name); \
static StatCounter allocsize_name0(per_name_allocsize_name0); \
if (nitems == 0) { \
alloc_name0.log(); \
allocsize_name0.log(_PyObject_VAR_SIZE(cls, nitems)); \
} else { \
alloc_name.log(); \
allocsize_name.log(_PyObject_VAR_SIZE(cls, nitems)); \
} \
}
#else
#define ALLOC_STATS(cls)
#define ALLOC_STATS_VAR(cls)
#endif
// In the simple cases, we can inline the fast paths of the following methods and improve allocation speed quite a bit:
// - Box::operator new
// - cls->tp_alloc
// - PyType_GenericAlloc
// - PyObject_Init
// The restrictions on when you can use the SIMPLE (ie fast) variant are encoded as
// asserts in the 1-arg operator new function:
// A faster version that can be used for classes that can use "FAST" operator new
#define DEFAULT_CLASS_SIMPLE(default_cls, is_gc) \
void* operator new(size_t size, BoxedClass * cls) __attribute__((visibility("default"))) { \
return Box::operator new(size, cls); \
} \
void* operator new(size_t size) __attribute__((visibility("default"))) { \
ALLOC_STATS(default_cls); \
assert(default_cls->tp_alloc == PyType_GenericAlloc); \
assert(default_cls->tp_itemsize == 0); \
assert(default_cls->tp_basicsize == size); \
assert(default_cls->is_pyston_class); \
assert(default_cls->attrs_offset == 0); \
assert(is_gc == PyType_IS_GC(default_cls)); \
bool is_heaptype = false; \
assert(is_heaptype == (bool)(default_cls->tp_flags & Py_TPFLAGS_HEAPTYPE)); \
\
/* Don't allocate classes through this -- we need to keep track of all class objects. */ \
assert(default_cls != type_cls); \
\
/* note: we want to use size instead of tp_basicsize, since size is a compile-time constant */ \
void* mem; \
if (is_gc) \
mem = _PyObject_GC_Malloc(size); \
else \
mem = PyObject_MALLOC(size); \
assert(mem); \
\
Box* rtn = static_cast<Box*>(mem); \
\
if (is_heaptype) \
Py_INCREF(default_cls); \
\
PyObject_INIT(rtn, default_cls); \
\
if (is_gc) \
_PyObject_GC_TRACK(rtn); \
\
return rtn; \
/* TODO: there should be a way to not have to do this nested inlining by hand */ \
}
void* operator new(size_t size) __attribute__((visibility("default"))) { return newFast<is_gc>(size, default_cls); }
// This corresponds to CPython's PyVarObject, for objects with a variable number of "items" that are stored inline.
// For example, strings and tuples store their data in line in the main object allocation, so are BoxVars. Lists,
......@@ -938,6 +952,7 @@ static_assert(offsetof(BoxVar, ob_size) == offsetof(struct _varobject, ob_size),
return BoxVar::operator new(size, default_cls, nitems); \
}
// TODO: extract out newFastVar like we did with newFast
#define DEFAULT_CLASS_VAR_SIMPLE(default_cls, itemsize) \
static_assert(itemsize > 0, ""); \
inline void _base_check() { \
......
......@@ -26,7 +26,7 @@ BoxedDictIterator::BoxedDictIterator(BoxedDict* d) : d(d), it(d->d.begin()), itE
Box* dict_iter(Box* s) noexcept {
assert(PyDict_Check(s));
BoxedDict* self = static_cast<BoxedDict*>(s);
return new (&PyDictIterKey_Type) BoxedDictIterator(self);
return new (&PyDictIterKey_Type, FAST_GC) BoxedDictIterator(self);
}
Box* dictIterKeys(Box* s) {
......@@ -36,13 +36,13 @@ Box* dictIterKeys(Box* s) {
Box* dictIterValues(Box* s) {
assert(PyDict_Check(s));
BoxedDict* self = static_cast<BoxedDict*>(s);
return new (&PyDictIterValue_Type) BoxedDictIterator(self);
return new (&PyDictIterValue_Type, FAST_GC) BoxedDictIterator(self);
}
Box* dictIterItems(Box* s) {
assert(PyDict_Check(s));
BoxedDict* self = static_cast<BoxedDict*>(s);
return new (&PyDictIterItem_Type) BoxedDictIterator(self);
return new (&PyDictIterItem_Type, FAST_GC) BoxedDictIterator(self);
}
Box* dictIterIter(Box* s) {
......
......@@ -33,7 +33,7 @@ Box* listIterIter(Box* s) {
Box* listIter(Box* s) noexcept {
assert(PyList_Check(s));
BoxedList* self = static_cast<BoxedList*>(s);
return new BoxedListIterator(self, 0);
return new (list_iterator_cls, FAST_GC) BoxedListIterator(self, 0);
}
Box* listiterHasnext(Box* s) {
......@@ -89,7 +89,7 @@ Box* listiter_next(Box* s) noexcept {
Box* listReversed(Box* s) {
assert(PyList_Check(s));
BoxedList* self = static_cast<BoxedList*>(s);
return new (list_reverse_iterator_cls) BoxedListIterator(self, self->size - 1);
return new (list_reverse_iterator_cls, FAST_GC) BoxedListIterator(self, self->size - 1);
}
Box* listreviterHasnext(Box* s) {
......
......@@ -28,8 +28,6 @@ public:
int pos;
BoxedListIterator(BoxedList* l, int start);
DEFAULT_CLASS(list_iterator_cls);
static void dealloc(BoxedListIterator* o) noexcept {
PyObject_GC_UnTrack(o);
Py_XDECREF(o->l);
......
......@@ -33,7 +33,7 @@ public:
BoxedLong() __attribute__((visibility("default"))) {}
DEFAULT_CLASS(long_cls);
DEFAULT_CLASS_SIMPLE(long_cls, false);
};
extern "C" Box* createLong(llvm::StringRef s);
......
......@@ -2217,7 +2217,7 @@ public:
b = NULL;
}
DEFAULT_CLASS(attrwrapper_cls);
DEFAULT_CLASS_SIMPLE(attrwrapper_cls, true);
BORROWED(Box*) getUnderlying() {
......
......@@ -281,6 +281,42 @@ protected:
friend void setupThread();
};
template <bool is_gc> void* Box::newFast(size_t size, BoxedClass* cls) {
ALLOC_STATS(cls);
assert(cls->tp_alloc == PyType_GenericAlloc);
assert(cls->tp_itemsize == 0);
assert(cls->tp_basicsize == size);
assert(cls->is_pyston_class);
assert(cls->attrs_offset == 0);
assert(is_gc == PyType_IS_GC(cls));
bool is_heaptype = false;
assert(is_heaptype == (bool)(cls->tp_flags & Py_TPFLAGS_HEAPTYPE));
/* Don't allocate classes through this -- we need to keep track of all class objects. */
assert(cls != type_cls);
/* note: we want to use size instead of tp_basicsize, since size is a compile-time constant */
void* mem;
if (is_gc)
mem = _PyObject_GC_Malloc(size);
else
mem = PyObject_MALLOC(size);
assert(mem);
Box* rtn = static_cast<Box*>(mem);
if (is_heaptype)
Py_INCREF(cls);
PyObject_INIT(rtn, cls);
if (is_gc)
_PyObject_GC_TRACK(rtn);
return rtn;
/* TODO: there should be a way to not have to do this nested inlining by hand */
}
// Corresponds to PyHeapTypeObject. Very similar to BoxedClass, but allocates some extra space for
// structures that otherwise might get allocated statically. For instance, tp_as_number for builtin
// types will usually point to a `static PyNumberMethods` object, but for a heap-allocated class it
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment