Commit 2c4ab499 authored by Kevin Modzelewski's avatar Kevin Modzelewski Committed by Kevin Modzelewski

Rearrange things to improve our ability to inline common cases

We seem to be spending a fair amount of time doing unnecessary work
for simple calls like boxInt and createList, which are generated
by irgen and reduce to calling new BoxedInt / BoxedList.  The
operator new calls tp_alloc, so we get some indirect function calls,
and then tp_alloc does some checking about its caller, and then we
check to see what size object to create, and how to initialize it.

I created a DEFAULT_CLASS_SIMPLE macro to go with DEFAULT_CLASS,
that should help with these things.  I (manually) inlined all of those
functions into the operator new.

I also moved the small arena bucket selection function (SmallArena::alloc)
into the header file so that it can get inlined, since the allocation size
is often known at compile time and we can statically resolve to a bucket.

Putting these together means that boxInt and createList are much tighter.
parent a2e51e4f
......@@ -87,14 +87,6 @@ private:
std::vector<Instruction*> to_remove;
for (User* user : li->users()) {
if (CallInst* call = dyn_cast<CallInst>(user)) {
if (call->getCalledFunction()->getName() == "_maybeDecrefCls") {
errs() << "Found decrefcls call: " << *call << '\n';
if (!isUserDefined(cls)) {
// Don't delete right away; I think that invalidates the iterator
// we're currently iterating over
to_remove.push_back(call);
}
}
continue;
}
......@@ -113,7 +105,8 @@ private:
assert(success);
int64_t offset = ap_offset.getSExtValue();
errs() << "Found a gep at offset " << offset << ": " << *gep << '\n';
if (VERBOSITY("opt") >= 1)
errs() << "Found a gep at offset " << offset << ": " << *gep << '\n';
for (User* gep_user : gep->users()) {
LoadInst* gep_load = dyn_cast<LoadInst>(gep_user);
......@@ -123,14 +116,17 @@ private:
}
errs() << "Found a load: " << *gep_load << '\n';
if (VERBOSITY("opt") >= 1)
errs() << "Found a load: " << *gep_load << '\n';
if (offset == offsetof(BoxedClass, attrs_offset)) {
errs() << "attrs_offset; replacing with " << cls->attrs_offset << "\n";
if (VERBOSITY("opt") >= 1)
errs() << "attrs_offset; replacing with " << cls->attrs_offset << "\n";
replaceUsesWithConstant(gep_load, cls->attrs_offset);
changed = true;
} else if (offset == offsetof(BoxedClass, tp_basicsize)) {
errs() << "tp_basicsize; replacing with " << cls->tp_basicsize << "\n";
if (VERBOSITY("opt") >= 1)
errs() << "tp_basicsize; replacing with " << cls->tp_basicsize << "\n";
replaceUsesWithConstant(gep_load, cls->tp_basicsize);
changed = true;
}
......
......@@ -441,6 +441,9 @@ public:
};
static_assert(offsetof(Box, cls) == offsetof(struct _object, ob_type), "");
// Our default for tp_alloc:
PyObject* PystonType_GenericAlloc(BoxedClass* cls, Py_ssize_t nitems) noexcept;
#define DEFAULT_CLASS(default_cls) \
void* operator new(size_t size, BoxedClass * cls) __attribute__((visibility("default"))) { \
return Box::operator new(size, cls); \
......@@ -449,6 +452,36 @@ static_assert(offsetof(Box, cls) == offsetof(struct _object, ob_type), "");
return Box::operator new(size, default_cls); \
}
// The restrictions on when you can use the SIMPLE (ie fast) variant are encoded as
// asserts in the 1-arg operator new function:
#define DEFAULT_CLASS_SIMPLE(default_cls) \
void* operator new(size_t size, BoxedClass * cls) __attribute__((visibility("default"))) { \
return Box::operator new(size, cls); \
} \
void* operator new(size_t size) __attribute__((visibility("default"))) { \
/* In the simple cases, we can inline the following methods and simplify things a lot: \
* - Box::operator new \
* - cls->tp_alloc \
* - PystonType_GenericAlloc \
* - PyObject_Init \
*/ \
assert(default_cls->tp_alloc == PystonType_GenericAlloc); \
assert(default_cls->tp_itemsize == 0); \
assert(default_cls->tp_basicsize == size); \
assert(default_cls->is_pyston_class); \
assert(default_cls->attrs_offset == 0); \
\
/* note: we want to use size instead of tp_basicsize, since size is a compile-time constant */ \
void* mem = gc_alloc(size, gc::GCKind::PYTHON); \
assert(mem); \
\
Box* rtn = static_cast<Box*>(mem); \
\
rtn->cls = default_cls; \
return rtn; \
/* TODO: there should be a way to not have to do this nested inlining by hand */ \
}
// CPython C API compatibility class:
class BoxVar : public Box {
public:
......
......@@ -27,13 +27,6 @@
namespace pyston {
namespace gc {
// Notify the gc of n bytes as being under GC management.
// This is called internally for anything allocated through gc_alloc,
// but it can also be called by clients to say that they have memory that
// is ultimately GC managed but did not get allocated through gc_alloc,
// such as memory that will get freed by a gc destructor.
void registerGCManagedBytes(size_t bytes);
extern "C" inline void* gc_alloc(size_t bytes, GCKind kind_id) __attribute__((visibility("default")));
extern "C" inline void* gc_alloc(size_t bytes, GCKind kind_id) {
size_t alloc_bytes = bytes + sizeof(GCAllocation);
......
......@@ -227,22 +227,6 @@ void dumpHeapStatistics() {
//////
/// Small Arena
GCAllocation* SmallArena::alloc(size_t bytes) {
registerGCManagedBytes(bytes);
if (bytes <= 16)
return _alloc(16, 0);
else if (bytes <= 32)
return _alloc(32, 1);
else {
for (int i = 2; i < NUM_BUCKETS; i++) {
if (sizes[i] >= bytes) {
return _alloc(sizes[i], i);
}
}
return NULL;
}
}
GCAllocation* SmallArena::realloc(GCAllocation* al, size_t bytes) {
Block* b = Block::forPointer(al);
......
......@@ -25,6 +25,13 @@
namespace pyston {
namespace gc {
// Notify the gc of n bytes as being under GC management.
// This is called internally for anything allocated through gc_alloc,
// but it can also be called by clients to say that they have memory that
// is ultimately GC managed but did not get allocated through gc_alloc,
// such as memory that will get freed by a gc destructor.
void registerGCManagedBytes(size_t bytes);
class Heap;
struct HeapStatistics;
......@@ -119,7 +126,22 @@ public:
#endif
}
GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes);
GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes) {
registerGCManagedBytes(bytes);
if (bytes <= 16)
return _alloc(16, 0);
else if (bytes <= 32)
return _alloc(32, 1);
else {
for (int i = 2; i < NUM_BUCKETS; i++) {
if (sizes[i] >= bytes) {
return _alloc(sizes[i], i);
}
}
return NULL;
}
}
GCAllocation* realloc(GCAllocation* alloc, size_t bytes);
void free(GCAllocation* al);
......
......@@ -315,7 +315,7 @@ public:
BoxedInt(int64_t n) __attribute__((visibility("default"))) : n(n) {}
DEFAULT_CLASS(int_cls);
DEFAULT_CLASS_SIMPLE(int_cls);
};
class BoxedFloat : public Box {
......@@ -324,7 +324,7 @@ public:
BoxedFloat(double d) __attribute__((visibility("default"))) : d(d) {}
DEFAULT_CLASS(float_cls);
DEFAULT_CLASS_SIMPLE(float_cls);
};
class BoxedComplex : public Box {
......@@ -334,14 +334,14 @@ public:
BoxedComplex(double r, double i) __attribute__((visibility("default"))) : real(r), imag(i) {}
DEFAULT_CLASS(complex_cls);
DEFAULT_CLASS_SIMPLE(complex_cls);
};
class BoxedBool : public BoxedInt {
public:
BoxedBool(bool b) __attribute__((visibility("default"))) : BoxedInt(b ? 1 : 0) {}
DEFAULT_CLASS(bool_cls);
DEFAULT_CLASS_SIMPLE(bool_cls);
};
class BoxedString : public Box {
......@@ -353,7 +353,7 @@ public:
BoxedString(std::string&& s) __attribute__((visibility("default")));
BoxedString(const std::string& s) __attribute__((visibility("default")));
DEFAULT_CLASS(str_cls);
DEFAULT_CLASS_SIMPLE(str_cls);
};
class BoxedUnicode : public Box {
......@@ -367,7 +367,7 @@ public:
BoxedInstanceMethod(Box* obj, Box* func) __attribute__((visibility("default"))) : obj(obj), func(func) {}
DEFAULT_CLASS(instancemethod_cls);
DEFAULT_CLASS_SIMPLE(instancemethod_cls);
};
class GCdArray {
......@@ -399,7 +399,7 @@ public:
void shrink();
static const int INITIAL_CAPACITY;
DEFAULT_CLASS(list_cls);
DEFAULT_CLASS_SIMPLE(list_cls);
};
class BoxedTuple : public Box {
......@@ -410,7 +410,7 @@ public:
BoxedTuple(GCVector& elts) __attribute__((visibility("default"))) : elts(elts) {}
BoxedTuple(GCVector&& elts) __attribute__((visibility("default"))) : elts(std::move(elts)) {}
DEFAULT_CLASS(tuple_cls);
DEFAULT_CLASS_SIMPLE(tuple_cls);
};
extern "C" BoxedTuple* EmptyTuple;
......@@ -434,7 +434,7 @@ public:
BoxedDict() __attribute__((visibility("default"))) {}
DEFAULT_CLASS(dict_cls);
DEFAULT_CLASS_SIMPLE(dict_cls);
Box* getOrNull(Box* k) {
const auto& p = d.find(k);
......@@ -502,7 +502,7 @@ public:
Box* start, *stop, *step;
BoxedSlice(Box* lower, Box* upper, Box* step) : start(lower), stop(upper), step(step) {}
DEFAULT_CLASS(slice_cls);
DEFAULT_CLASS_SIMPLE(slice_cls);
};
class BoxedMemberDescriptor : public Box {
......@@ -534,7 +534,7 @@ public:
BoxedMemberDescriptor(MemberType type, int offset) : type(type), offset(offset) {}
BoxedMemberDescriptor(PyMemberDef* member) : type((MemberType)member->type), offset(member->offset) {}
DEFAULT_CLASS(member_cls);
DEFAULT_CLASS_SIMPLE(member_cls);
};
class BoxedGetsetDescriptor : public Box {
......@@ -546,7 +546,7 @@ public:
BoxedGetsetDescriptor(Box* (*get)(Box*, void*), void (*set)(Box*, Box*, void*), void* closure)
: get(get), set(set), closure(closure) {}
DEFAULT_CLASS(getset_cls);
DEFAULT_CLASS_SIMPLE(getset_cls);
};
class BoxedProperty : public Box {
......@@ -559,7 +559,7 @@ public:
BoxedProperty(Box* get, Box* set, Box* del, Box* doc)
: prop_get(get), prop_set(set), prop_del(del), prop_doc(doc) {}
DEFAULT_CLASS(property_cls);
DEFAULT_CLASS_SIMPLE(property_cls);
};
class BoxedStaticmethod : public Box {
......@@ -568,7 +568,7 @@ public:
BoxedStaticmethod(Box* callable) : sm_callable(callable){};
DEFAULT_CLASS(staticmethod_cls);
DEFAULT_CLASS_SIMPLE(staticmethod_cls);
};
class BoxedClassmethod : public Box {
......@@ -577,7 +577,7 @@ public:
BoxedClassmethod(Box* callable) : cm_callable(callable){};
DEFAULT_CLASS(classmethod_cls);
DEFAULT_CLASS_SIMPLE(classmethod_cls);
};
// TODO is there any particular reason to make this a Box, ie a python-level object?
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment