Commit ea2dec66 authored by Marius Wachtler's avatar Marius Wachtler Committed by GitHub

Merge pull request #1305 from undingen/small_optimizations

add _bisect, always_use_version support CAPI, set some frequent accessed slots
parents 48ec63a6 349c4d4b
......@@ -20,6 +20,7 @@ add_custom_target(copy_stdlib ALL DEPENDS ${STDLIB_TARGETS})
# compile specified files in from_cpython/Modules
file(GLOB_RECURSE STDMODULE_SRCS Modules
_bisectmodule.c
_codecsmodule.c
_collectionsmodule.c
_csv.c
......
......@@ -1970,13 +1970,13 @@ static const slotdef* update_one_slot(BoxedClass* type, const slotdef* p) noexce
sanity checks. I'll buy the first person to
point out a bug in this reasoning a beer. */
} else if (offset == offsetof(BoxedClass, tp_descr_get) && descr->cls == function_cls
&& static_cast<BoxedFunction*>(descr)->md->always_use_version) {
CompiledFunction* cf = static_cast<BoxedFunction*>(descr)->md->always_use_version;
if (cf->exception_style == CXX) {
type->tpp_descr_get = (descrgetfunc)cf->code;
&& !static_cast<BoxedFunction*>(descr)->md->always_use_version.empty()) {
auto md = static_cast<BoxedFunction*>(descr)->md;
if (md->always_use_version.get<CAPI>())
specific = md->always_use_version.get<CAPI>();
else {
type->tpp_descr_get = (descrgetfunc)md->always_use_version.get<CXX>()->code;
specific = (void*)slot_tp_tpp_descr_get;
} else {
specific = cf->code;
}
} else if (descr == Py_None && ptr == (void**)&type->tp_hash) {
/* We specifically allow __hash__ to be set to None
......
......@@ -47,7 +47,6 @@ FunctionMetadata::FunctionMetadata(int num_args, bool takes_varargs, bool takes_
takes_kwargs(takes_kwargs),
source(std::move(source)),
param_names(this->source->ast, this->source->getInternedStrings()),
always_use_version(NULL),
times_interpreted(0),
internal_callable(NULL, NULL) {
}
......@@ -59,7 +58,6 @@ FunctionMetadata::FunctionMetadata(int num_args, bool takes_varargs, bool takes_
takes_kwargs(takes_kwargs),
source(nullptr),
param_names(param_names),
always_use_version(NULL),
times_interpreted(0),
internal_callable(NULL, NULL) {
}
......@@ -81,9 +79,12 @@ void FunctionMetadata::addVersion(CompiledFunction* compiled) {
if (compiled->entry_descriptor == NULL) {
bool could_have_speculations = (source.get() != NULL);
if (!could_have_speculations && versions.size() == 0 && compiled->effort == EffortLevel::MAXIMAL
&& compiled->spec->accepts_all_inputs && compiled->spec->boxed_return_value)
always_use_version = compiled;
if (!could_have_speculations && compiled->effort == EffortLevel::MAXIMAL && compiled->spec->accepts_all_inputs
&& compiled->spec->boxed_return_value
&& (versions.size() == 0 || (versions.size() == 1 && !always_use_version.empty()))) {
always_use_version.get(compiled->exception_style) = compiled;
} else
assert(always_use_version.empty());
assert(compiled->spec->arg_types.size() == numReceivedArgs());
versions.push_back(compiled);
......
......@@ -648,7 +648,7 @@ void CompiledFunction::speculationFailed() {
FunctionMetadata* md = this->md;
assert(md);
assert(this != md->always_use_version);
assert(this != md->always_use_version.get(exception_style));
bool found = false;
for (int i = 0; i < md->versions.size(); i++) {
......
......@@ -95,19 +95,22 @@ public:
ExceptionSwitchable() : capi_val(), cxx_val() {}
ExceptionSwitchable(T capi_val, T cxx_val) : capi_val(std::move(capi_val)), cxx_val(std::move(cxx_val)) {}
template <ExceptionStyle S> T get() {
template <ExceptionStyle S> T& get() {
if (S == CAPI)
return capi_val;
else
return cxx_val;
}
T get(ExceptionStyle S) {
T& get(ExceptionStyle S) {
if (S == CAPI)
return capi_val;
else
return cxx_val;
}
bool empty() const { return !capi_val && !cxx_val; }
void clear() { *this = ExceptionSwitchable<T>(); }
};
template <typename R, typename... Args>
......@@ -470,7 +473,8 @@ public:
FunctionList
versions; // any compiled versions along with their type parameters; in order from most preferred to least
CompiledFunction* always_use_version; // if this version is set, always use it (for unboxed cases)
ExceptionSwitchable<CompiledFunction*>
always_use_version; // if this version is set, always use it (for unboxed cases)
std::unordered_map<const OSREntryDescriptor*, CompiledFunction*> osr_versions;
// Profiling counter:
......
......@@ -108,5 +108,6 @@ void setupBool() {
bool_cls->freeze();
bool_cls->tp_hash = (hashfunc)bool_hash;
bool_cls->tp_repr = boolRepr<CAPI>;
bool_as_number.nb_int = int_cls->tp_as_number->nb_int;
}
}
......@@ -792,7 +792,7 @@ template <ExceptionStyle S> Box* floatRepr(BoxedFloat* self) noexcept(S == CAPI)
return callCAPIFromStyle<S>(float_str_or_repr, self->d, 0, 'r');
}
Box* floatToInt(BoxedFloat* self) {
Box* float_to_int(BoxedFloat* self) noexcept {
double wholepart; /* integral portion of x, rounded toward 0 */
(void)modf(self->d, &wholepart);
......@@ -823,14 +823,14 @@ Box* floatTrunc(BoxedFloat* self) {
raiseExcHelper(TypeError, "descriptor '__trunc__' requires a 'float' object but received a '%s'",
getTypeName(self));
return floatToInt(self);
return float_to_int(self);
}
Box* floatInt(BoxedFloat* self) {
if (!PyFloat_Check(self))
raiseExcHelper(TypeError, "descriptor '__int__' requires a 'float' object but received a '%s'",
getTypeName(self));
return floatToInt(self);
return float_to_int(self);
}
Box* floatLong(BoxedFloat* self) {
......@@ -1021,9 +1021,11 @@ void setupFloat() {
_PyFloat_Init();
float_cls->tp_as_number->nb_power = float_pow;
float_cls->tp_new = (newfunc)floatNewPacked;
float_cls->tp_repr = (reprfunc)floatRepr<CAPI>;
float_cls->tp_str = (reprfunc)floatStr<CAPI>;
float_cls->tp_richcompare = float_richcompare;
float_cls->tp_as_number->nb_int = (unaryfunc)float_to_int;
float_cls->tp_as_number->nb_power = float_pow;
}
}
......@@ -1183,14 +1183,22 @@ extern "C" Box* intTrunc(BoxedInt* self) {
return boxInt(self->n);
}
static PyObject* int_int(BoxedInt* v) noexcept {
if (v->cls == int_cls)
return incref(v);
return boxInt(v->n);
}
static PyObject* int_long(BoxedInt* v) noexcept {
return boxLong(v->n);
}
extern "C" Box* intInt(BoxedInt* self) {
if (!PyInt_Check(self))
raiseExcHelper(TypeError, "descriptor '__int__' requires a 'int' object but received a '%s'",
getTypeName(self));
if (self->cls == int_cls)
return incref(self);
return boxInt(self->n);
return int_int(self);
}
Box* intFloat(BoxedInt* self) {
......@@ -1206,7 +1214,7 @@ Box* intLong(BoxedInt* self) {
raiseExcHelper(TypeError, "descriptor '__long__' requires a 'int' object but received a '%s'",
getTypeName(self));
return boxLong(self->n);
return int_long(self);
}
extern "C" Box* intIndex(BoxedInt* v) {
......@@ -1644,5 +1652,7 @@ void setupInt() {
int_cls->tp_repr = (reprfunc)int_to_decimal_string;
int_cls->tp_new = (newfunc)intNewPacked;
int_as_number.nb_int = (unaryfunc)int_int;
int_as_number.nb_long = (unaryfunc)int_long;
}
}
......@@ -698,12 +698,7 @@ extern "C" Box* listDelitem(BoxedList* self, Box* slice) {
return rtn;
}
extern "C" Box* listInsert(BoxedList* self, Box* idx, Box* v) {
if (idx->cls != int_cls) {
raiseExcHelper(TypeError, "an integer is required");
}
int64_t n = static_cast<BoxedInt*>(idx)->n;
extern "C" void listInsertInternal(BoxedList* self, int64_t n, Box* v) {
if (n < 0)
n = self->size + n;
......@@ -721,6 +716,15 @@ extern "C" Box* listInsert(BoxedList* self, Box* idx, Box* v) {
Py_INCREF(v);
self->elts->elts[n] = v;
}
}
extern "C" Box* listInsert(BoxedList* self, Box* idx, Box* v) {
if (idx->cls != int_cls) {
raiseExcHelper(TypeError, "an integer is required");
}
int64_t n = static_cast<BoxedInt*>(idx)->n;
listInsertInternal(self, n, v);
Py_RETURN_NONE;
}
......@@ -735,7 +739,7 @@ extern "C" int PyList_Insert(PyObject* op, Py_ssize_t where, PyObject* newitem)
PyErr_BadInternalCall();
return -1;
}
listInsert((BoxedList*)op, boxInt(where), newitem);
listInsertInternal((BoxedList*)op, where, newitem);
return 0;
} catch (ExcInfo e) {
setCAPIException(e);
......@@ -1218,6 +1222,107 @@ Box* listInit(BoxedList* self, Box* container) {
Py_RETURN_NONE;
}
static PyObject* list_richcompare(PyObject* v, PyObject* w, int op) noexcept {
PyListObject* vl, *wl;
Py_ssize_t i;
if (!PyList_Check(v) || !PyList_Check(w)) {
Py_INCREF(Py_NotImplemented);
return Py_NotImplemented;
}
vl = (PyListObject*)v;
wl = (PyListObject*)w;
if (Py_SIZE(vl) != Py_SIZE(wl) && (op == Py_EQ || op == Py_NE)) {
/* Shortcut: if the lengths differ, the lists differ */
PyObject* res;
if (op == Py_EQ)
res = Py_False;
else
res = Py_True;
Py_INCREF(res);
return res;
}
/* Search for the first index where items are different */
for (i = 0; i < Py_SIZE(vl) && i < Py_SIZE(wl); i++) {
int k = PyObject_RichCompareBool(vl->ob_item[i], wl->ob_item[i], Py_EQ);
if (k < 0)
return NULL;
if (!k)
break;
}
if (i >= Py_SIZE(vl) || i >= Py_SIZE(wl)) {
/* No more items to compare -- compare sizes */
Py_ssize_t vs = Py_SIZE(vl);
Py_ssize_t ws = Py_SIZE(wl);
int cmp;
PyObject* res;
switch (op) {
case Py_LT:
cmp = vs < ws;
break;
case Py_LE:
cmp = vs <= ws;
break;
case Py_EQ:
cmp = vs == ws;
break;
case Py_NE:
cmp = vs != ws;
break;
case Py_GT:
cmp = vs > ws;
break;
case Py_GE:
cmp = vs >= ws;
break;
default:
return NULL; /* cannot happen */
}
if (cmp)
res = Py_True;
else
res = Py_False;
Py_INCREF(res);
return res;
}
/* We have an item that differs -- shortcuts for EQ/NE */
if (op == Py_EQ) {
Py_INCREF(Py_False);
return Py_False;
}
if (op == Py_NE) {
Py_INCREF(Py_True);
return Py_True;
}
/* Compare the final item again using the proper operator */
return PyObject_RichCompare(vl->ob_item[i], wl->ob_item[i], op);
}
static int list_init(PyListObject* self, PyObject* args, PyObject* kw) noexcept {
PyObject* arg = NULL;
static char* kwlist[2] = { NULL, NULL };
kwlist[0] = const_cast<char*>("sequence");
if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:list", kwlist, &arg))
return -1;
try {
autoDecref(listInit((BoxedList*)self, arg));
} catch (ExcInfo e) {
setCAPIException(e);
return -1;
}
return 0;
}
extern "C" PyObject* PyList_New(Py_ssize_t size) noexcept {
try {
BoxedList* l = new BoxedList();
......@@ -1235,65 +1340,12 @@ extern "C" PyObject* PyList_New(Py_ssize_t size) noexcept {
}
}
Box* _listCmp(BoxedList* lhs, BoxedList* rhs, AST_TYPE::AST_TYPE op_type) {
int lsz = lhs->size;
int rsz = rhs->size;
bool is_order
= (op_type == AST_TYPE::Lt || op_type == AST_TYPE::LtE || op_type == AST_TYPE::Gt || op_type == AST_TYPE::GtE);
if (lsz != rsz) {
if (op_type == AST_TYPE::Eq)
Py_RETURN_FALSE;
if (op_type == AST_TYPE::NotEq)
Py_RETURN_TRUE;
}
int n = std::min(lsz, rsz);
for (int i = 0; i < n; i++) {
bool identity_eq = lhs->elts->elts[i] == rhs->elts->elts[i];
if (identity_eq)
continue;
int r = PyObject_RichCompareBool(lhs->elts->elts[i], rhs->elts->elts[i], Py_EQ);
if (r == -1)
throwCAPIException();
if (r)
continue;
if (op_type == AST_TYPE::Eq) {
return boxBool(false);
} else if (op_type == AST_TYPE::NotEq) {
return boxBool(true);
} else {
Box* r = compareInternal<NOT_REWRITABLE>(lhs->elts->elts[i], rhs->elts->elts[i], op_type, NULL);
return r;
}
}
if (op_type == AST_TYPE::Lt)
return boxBool(lsz < rsz);
else if (op_type == AST_TYPE::LtE)
return boxBool(lsz <= rsz);
else if (op_type == AST_TYPE::Gt)
return boxBool(lsz > rsz);
else if (op_type == AST_TYPE::GtE)
return boxBool(lsz >= rsz);
else if (op_type == AST_TYPE::Eq)
return boxBool(lsz == rsz);
else if (op_type == AST_TYPE::NotEq)
return boxBool(lsz != rsz);
RELEASE_ASSERT(0, "%d", op_type);
}
Box* listEq(BoxedList* self, Box* rhs) {
if (!PyList_Check(rhs)) {
return incref(NotImplemented);
}
return _listCmp(self, static_cast<BoxedList*>(rhs), AST_TYPE::Eq);
return callCAPIFromStyle<CXX>(list_richcompare, self, rhs, Py_EQ);
}
Box* listNe(BoxedList* self, Box* rhs) {
......@@ -1301,7 +1353,7 @@ Box* listNe(BoxedList* self, Box* rhs) {
return incref(NotImplemented);
}
return _listCmp(self, static_cast<BoxedList*>(rhs), AST_TYPE::NotEq);
return callCAPIFromStyle<CXX>(list_richcompare, self, rhs, Py_NE);
}
Box* listLt(BoxedList* self, Box* rhs) {
......@@ -1309,7 +1361,7 @@ Box* listLt(BoxedList* self, Box* rhs) {
return incref(NotImplemented);
}
return _listCmp(self, static_cast<BoxedList*>(rhs), AST_TYPE::Lt);
return callCAPIFromStyle<CXX>(list_richcompare, self, rhs, Py_LT);
}
Box* listLe(BoxedList* self, Box* rhs) {
......@@ -1317,7 +1369,7 @@ Box* listLe(BoxedList* self, Box* rhs) {
return incref(NotImplemented);
}
return _listCmp(self, static_cast<BoxedList*>(rhs), AST_TYPE::LtE);
return callCAPIFromStyle<CXX>(list_richcompare, self, rhs, Py_LE);
}
Box* listGt(BoxedList* self, Box* rhs) {
......@@ -1325,7 +1377,7 @@ Box* listGt(BoxedList* self, Box* rhs) {
return incref(NotImplemented);
}
return _listCmp(self, static_cast<BoxedList*>(rhs), AST_TYPE::Gt);
return callCAPIFromStyle<CXX>(list_richcompare, self, rhs, Py_GT);
}
Box* listGe(BoxedList* self, Box* rhs) {
......@@ -1333,7 +1385,7 @@ Box* listGe(BoxedList* self, Box* rhs) {
return incref(NotImplemented);
}
return _listCmp(self, static_cast<BoxedList*>(rhs), AST_TYPE::GtE);
return callCAPIFromStyle<CXX>(list_richcompare, self, rhs, Py_GE);
}
extern "C" PyObject* _PyList_Extend(PyListObject* self, PyObject* b) noexcept {
......@@ -1536,6 +1588,8 @@ void setupList() {
list_cls->freeze();
list_cls->tp_iter = listIter;
list_cls->tp_repr = list_repr;
list_cls->tp_init = (initproc)list_init;
list_cls->tp_richcompare = list_richcompare;
list_cls->tp_as_sequence->sq_length = list_length;
list_cls->tp_as_sequence->sq_concat = (binaryfunc)list_concat;
......
......@@ -3959,12 +3959,20 @@ static inline RewriterVar* getArg(int idx, _CallRewriteArgsBase* rewrite_args) {
}
static StatCounter slowpath_pickversion("slowpath_pickversion");
static CompiledFunction* pickVersion(FunctionMetadata* f, ExceptionStyle S, int num_output_args, Box* oarg1, Box* oarg2,
Box* oarg3, Box** oargs) {
template <ExceptionStyle S>
static CompiledFunction* pickVersion(FunctionMetadata* f, int num_output_args, Box* oarg1, Box* oarg2, Box* oarg3,
Box** oargs) {
LOCK_REGION(codegen_rwlock.asWrite());
if (f->always_use_version && f->always_use_version->exception_style == S)
return f->always_use_version;
// if always_use_version is set use it even if the exception style does not match.
// But prefer using the correct style if both are available
if (f->always_use_version.get(S))
return f->always_use_version.get(S);
ExceptionStyle other = S == CAPI ? CXX : CAPI;
if (f->always_use_version.get(other))
return f->always_use_version.get(other);
slowpath_pickversion.log();
CompiledFunction* best_nonexcmatch = NULL;
......@@ -4845,7 +4853,7 @@ Box* callCLFunc(FunctionMetadata* md, CallRewriteArgs* rewrite_args, int num_out
rewrite_args = NULL;
}
CompiledFunction* chosen_cf = pickVersion(md, S, num_output_args, oarg1, oarg2, oarg3, oargs);
CompiledFunction* chosen_cf = pickVersion<S>(md, num_output_args, oarg1, oarg2, oarg3, oargs);
if (!chosen_cf) {
if (rewrite_args) {
......
......@@ -51,6 +51,7 @@
#include "runtime/super.h"
#include "runtime/util.h"
extern "C" void init_bisect();
extern "C" void initerrno();
extern "C" void init_sha();
extern "C" void init_sha256();
......@@ -1006,7 +1007,8 @@ static Box* typeCallInner(CallRewriteArgs* rewrite_args, ArgPassSpec argspec, Bo
// this array is ok with not using StlCompatAllocator since we will manually register these objects with the GC
static std::vector<Box*> class_making_news;
if (class_making_news.empty()) {
for (BoxedClass* allowed_cls : { object_cls, enumerate_cls, xrange_cls, tuple_cls, list_cls, dict_cls }) {
for (BoxedClass* allowed_cls :
{ object_cls, enumerate_cls, xrange_cls, tuple_cls, list_cls, dict_cls, set_cls, frozenset_cls }) {
auto new_obj = typeLookup(allowed_cls, new_str);
class_making_news.push_back(new_obj);
}
......@@ -4085,6 +4087,7 @@ extern "C" {
struct _inittab _PyImport_Inittab[] = { { "array", initarray },
{ "_ast", init_ast },
{ "binascii", initbinascii },
{ "_bisect", init_bisect },
{ "_codecs", init_codecs },
{ "_collections", init_collections },
{ "cStringIO", initcStringIO },
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment