Commit 95fface9 authored by Rudi Chen's avatar Rudi Chen

Handle more slicing edge cases.

- Non-integer and non-None object passed into slices can fall back
  to item operator.
- Support indexeable items for slices when appropriate.
- Increase setitem and delitem ic slot sizes so that the rewriter
can succeed.
- Increase the performance of using slice operators on built-in types
  avoiding a lot of the guards and slowpath.
parent 987fa9ec
...@@ -302,11 +302,11 @@ ICSetupInfo* createGetitemIC(TypeRecorder* type_recorder) { ...@@ -302,11 +302,11 @@ ICSetupInfo* createGetitemIC(TypeRecorder* type_recorder) {
} }
ICSetupInfo* createSetitemIC(TypeRecorder* type_recorder) { ICSetupInfo* createSetitemIC(TypeRecorder* type_recorder) {
return ICSetupInfo::initialize(true, 1, 256, ICSetupInfo::Setitem, type_recorder); return ICSetupInfo::initialize(true, 1, 512, ICSetupInfo::Setitem, type_recorder);
} }
ICSetupInfo* createDelitemIC(TypeRecorder* type_recorder) { ICSetupInfo* createDelitemIC(TypeRecorder* type_recorder) {
return ICSetupInfo::initialize(false, 1, 256, ICSetupInfo::Delitem, type_recorder); return ICSetupInfo::initialize(false, 1, 512, ICSetupInfo::Delitem, type_recorder);
} }
ICSetupInfo* createSetattrIC(TypeRecorder* type_recorder) { ICSetupInfo* createSetattrIC(TypeRecorder* type_recorder) {
......
...@@ -134,7 +134,6 @@ static Py_ssize_t list_length(Box* self) noexcept { ...@@ -134,7 +134,6 @@ static Py_ssize_t list_length(Box* self) noexcept {
} }
Box* _listSlice(BoxedList* self, i64 start, i64 stop, i64 step, i64 length) { Box* _listSlice(BoxedList* self, i64 start, i64 stop, i64 step, i64 length) {
// printf("%ld %ld %ld\n", start, stop, step);
assert(step != 0); assert(step != 0);
if (step > 0) { if (step > 0) {
assert(0 <= start); assert(0 <= start);
...@@ -419,7 +418,7 @@ int list_ass_ext_slice(BoxedList* self, PyObject* item, PyObject* value) { ...@@ -419,7 +418,7 @@ int list_ass_ext_slice(BoxedList* self, PyObject* item, PyObject* value) {
} }
Box* listSetitemSliceInt64(BoxedList* self, i64 start, i64 stop, i64 step, Box* v) { Box* listSetitemSliceInt64(BoxedList* self, i64 start, i64 stop, i64 step, Box* v) {
RELEASE_ASSERT(step == 1, "step sizes must be 1 for now"); RELEASE_ASSERT(step == 1, "step sizes must be 1 in this code path");
boundSliceWithLength(&start, &stop, start, stop, self->size); boundSliceWithLength(&start, &stop, start, stop, self->size);
......
...@@ -2335,7 +2335,8 @@ extern "C" bool nonzero(Box* obj) { ...@@ -2335,7 +2335,8 @@ extern "C" bool nonzero(Box* obj) {
|| isSubclass(obj->cls, Exception) || obj->cls == file_cls || obj->cls == traceback_cls || isSubclass(obj->cls, Exception) || obj->cls == file_cls || obj->cls == traceback_cls
|| obj->cls == instancemethod_cls || obj->cls == module_cls || obj->cls == capifunc_cls || obj->cls == instancemethod_cls || obj->cls == module_cls || obj->cls == capifunc_cls
|| obj->cls == builtin_function_or_method_cls || obj->cls == method_cls || obj->cls == frame_cls || obj->cls == builtin_function_or_method_cls || obj->cls == method_cls || obj->cls == frame_cls
|| obj->cls == capi_getset_cls || obj->cls == pyston_getset_cls || obj->cls == wrapperdescr_cls, || obj->cls == generator_cls || obj->cls == capi_getset_cls || obj->cls == pyston_getset_cls
|| obj->cls == wrapperdescr_cls,
"%s.__nonzero__", getTypeName(obj)); // TODO "%s.__nonzero__", getTypeName(obj)); // TODO
// TODO should rewrite these? // TODO should rewrite these?
...@@ -4365,38 +4366,58 @@ extern "C" Box* unaryop(Box* operand, int op_type) { ...@@ -4365,38 +4366,58 @@ extern "C" Box* unaryop(Box* operand, int op_type) {
return rtn; return rtn;
} }
Box* callItemAttr(Box* target, BoxedString* item_str, Box* item, Box* value, CallRewriteArgs* rewrite_args) {
if (value) {
return callattrInternal2(target, item_str, CLASS_ONLY, rewrite_args, ArgPassSpec(2), item, value);
} else {
return callattrInternal1(target, item_str, CLASS_ONLY, rewrite_args, ArgPassSpec(1), item);
}
}
// This function decides whether to call the slice operator (e.g. __getslice__) // This function decides whether to call the slice operator (e.g. __getslice__)
// or the item operator (__getitem__). // or the item operator (__getitem__).
Box* callItemOrSliceAttr(Box* target, BoxedString* item_str, BoxedString* slice_str, Box* slice, Box* value, Box* callItemOrSliceAttr(Box* target, BoxedString* item_str, BoxedString* slice_str, Box* slice, Box* value,
CallRewriteArgs* rewrite_args) { CallRewriteArgs* rewrite_args) {
// If we don't have a slice operator, fall back to item operator.
// No rewriting for the typeLookup here (i.e. no guards if there is no slice operator). These guards
// will be added when we call callattrInternal on a slice operator later. If the guards fail, we fall
// back into the slow path which has this fallback to the item operator.
Box* slice_attr = typeLookup(target->cls, slice_str, NULL);
if (!slice_attr) {
if (value) {
return callattrInternal2(target, item_str, CLASS_ONLY, rewrite_args, ArgPassSpec(2), slice, value);
} else {
return callattrInternal1(target, item_str, CLASS_ONLY, rewrite_args, ArgPassSpec(1), slice);
}
}
// Need a slice object to use the slice operators. // This function contains a lot of logic for deciding between whether to call
if (slice->cls != slice_cls) { // the slice operator or the item operator, so we can match CPython's behavior
// on custom classes that define those operators. However, for builtin types,
// we know we can call either and the behavior will be the same. Adding all those
// guards are unnecessary and bad for performance.
//
// Also, for special slicing logic (e.g. open slice ranges [:]), the builtin types
// have C-implemented functions that already handle all the edge cases, so we don't
// need to have a slowpath for them here.
if (target->cls == list_cls || target->cls == str_cls || target->cls == unicode_cls) {
if (rewrite_args) { if (rewrite_args) {
rewrite_args->arg1->addAttrGuard(offsetof(Box, cls), (uint64_t)slice_cls, /*negate=*/true); rewrite_args->obj->addAttrGuard(offsetof(Box, cls), (uint64_t)target->cls);
} }
return callItemAttr(target, item_str, slice, value, rewrite_args);
}
if (value) { // Guard on the type of the object (need to have the slice operator attribute to call it).
return callattrInternal2(target, item_str, CLASS_ONLY, rewrite_args, ArgPassSpec(2), slice, value); Box* slice_attr = NULL;
} else { if (rewrite_args) {
return callattrInternal1(target, item_str, CLASS_ONLY, rewrite_args, ArgPassSpec(1), slice); RewriterVar* target_cls = rewrite_args->obj->getAttr(offsetof(Box, cls));
GetattrRewriteArgs grewrite_args(rewrite_args->rewriter, target_cls, Location::any());
slice_attr = typeLookup(target->cls, slice_str, &grewrite_args);
if (!grewrite_args.out_success) {
rewrite_args = NULL;
} }
} else { } else {
if (rewrite_args) { slice_attr = typeLookup(target->cls, slice_str, NULL);
rewrite_args->arg1->addAttrGuard(offsetof(Box, cls), (uint64_t)slice_cls); }
}
if (!slice_attr) {
return callItemAttr(target, item_str, slice, value, rewrite_args);
}
// Need a slice object to use the slice operators.
if (rewrite_args) {
rewrite_args->arg1->addAttrGuard(offsetof(Box, cls), (uint64_t)slice->cls);
}
if (slice->cls != slice_cls) {
return callItemAttr(target, item_str, slice, value, rewrite_args);
} }
BoxedSlice* bslice = (BoxedSlice*)slice; BoxedSlice* bslice = (BoxedSlice*)slice;
...@@ -4409,29 +4430,33 @@ Box* callItemOrSliceAttr(Box* target, BoxedString* item_str, BoxedString* slice_ ...@@ -4409,29 +4430,33 @@ Box* callItemOrSliceAttr(Box* target, BoxedString* item_str, BoxedString* slice_
->addAttrGuard(offsetof(Box, cls), (uint64_t)none_cls, /*negate=*/true); ->addAttrGuard(offsetof(Box, cls), (uint64_t)none_cls, /*negate=*/true);
} }
if (value) { return callItemAttr(target, item_str, slice, value, rewrite_args);
return callattrInternal2(target, item_str, CLASS_ONLY, rewrite_args, ArgPassSpec(2), slice, value);
} else {
return callattrInternal1(target, item_str, CLASS_ONLY, rewrite_args, ArgPassSpec(1), slice);
}
} else { } else {
rewrite_args = NULL; rewrite_args = NULL;
REWRITE_ABORTED(""); REWRITE_ABORTED("");
Box* start = bslice->start; // If the slice cannot be used as integer slices, also fall back to the get operator.
Box* stop = bslice->stop; // We could optimize further here by having a version of isSliceIndex that
// creates guards, but it would only affect some rare edge cases.
if (!isSliceIndex(bslice->start) || !isSliceIndex(bslice->stop)) {
return callItemAttr(target, item_str, slice, value, rewrite_args);
}
// If we don't specify the start/stop (e.g. o[:]), the slice operator functions // If we don't specify the start/stop (e.g. o[:]), the slice operator functions
// CPython seems to use 0 and sys.maxint as the default values. // CPython seems to use 0 and sys.maxint as the default values.
if (bslice->start->cls == none_cls) int64_t start = 0, stop = PyInt_GetMax();
start = boxInt(0); sliceIndex(bslice->start, &start);
if (bslice->stop->cls == none_cls) sliceIndex(bslice->stop, &stop);
stop = boxInt(PyInt_GetMax());
Box* boxedStart = boxInt(start);
Box* boxedStop = boxInt(stop);
if (value) { if (value) {
return callattrInternal3(target, slice_str, CLASS_ONLY, rewrite_args, ArgPassSpec(3), start, stop, value); return callattrInternal3(target, slice_str, CLASS_ONLY, rewrite_args, ArgPassSpec(3), boxedStart, boxedStop,
value);
} else { } else {
return callattrInternal2(target, slice_str, CLASS_ONLY, rewrite_args, ArgPassSpec(2), start, stop); return callattrInternal2(target, slice_str, CLASS_ONLY, rewrite_args, ArgPassSpec(2), boxedStart,
boxedStop);
} }
} }
} }
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include "core/options.h" #include "core/options.h"
#include "core/types.h" #include "core/types.h"
#include "runtime/objmodel.h" #include "runtime/objmodel.h"
#include "runtime/types.h"
namespace pyston { namespace pyston {
...@@ -27,19 +26,8 @@ void parseSlice(BoxedSlice* slice, int size, i64* out_start, i64* out_stop, i64* ...@@ -27,19 +26,8 @@ void parseSlice(BoxedSlice* slice, int size, i64* out_start, i64* out_stop, i64*
throwCAPIException(); throwCAPIException();
} }
void sliceIndex(Box* b, int64_t* out) { bool isSliceIndex(Box* b) {
if (b->cls == none_cls) { return b->cls == none_cls || b->cls == int_cls || PyIndex_Check(b);
// Leave default value in case of None (useful for slices like [2:])
} else if (b->cls == int_cls) {
*out = static_cast<BoxedInt*>(b)->n;
} else if (PyIndex_Check(b)) {
int64_t x = PyNumber_AsSsize_t(b, NULL);
if (!(x == -1 && PyErr_Occurred()))
*out = x;
} else {
raiseExcHelper(TypeError, "slice indices must be integers or "
"None or have an __index__ method");
}
} }
void boundSliceWithLength(i64* start_out, i64* stop_out, i64 start, i64 stop, i64 size) { void boundSliceWithLength(i64* start_out, i64* stop_out, i64 start, i64 stop, i64 size) {
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#define PYSTON_RUNTIME_UTIL_H #define PYSTON_RUNTIME_UTIL_H
#include "core/types.h" #include "core/types.h"
#include "runtime/types.h"
namespace pyston { namespace pyston {
...@@ -24,7 +25,18 @@ class BoxedSlice; ...@@ -24,7 +25,18 @@ class BoxedSlice;
void parseSlice(BoxedSlice* slice, int size, i64* out_start, i64* out_stop, i64* out_end, i64* out_length = nullptr); void parseSlice(BoxedSlice* slice, int size, i64* out_start, i64* out_stop, i64* out_end, i64* out_length = nullptr);
// Analogue of _PyEval_SliceIndex // Analogue of _PyEval_SliceIndex
void sliceIndex(Box* b, int64_t* out); inline void sliceIndex(Box* b, int64_t* out) {
if (b->cls == none_cls) {
// Leave default value in case of None (useful for slices like [2:])
return;
}
int ret = _PyEval_SliceIndex(b, out);
if (ret <= 0)
throwCAPIException();
}
bool isSliceIndex(Box* b);
// Adjust the start and stop bounds of the sequence we are slicing to its size. // Adjust the start and stop bounds of the sequence we are slicing to its size.
// Negative values greater or equal to (-length) become positive values. // Negative values greater or equal to (-length) become positive values.
......
...@@ -18,15 +18,15 @@ def install_and_test_protobuf(): ...@@ -18,15 +18,15 @@ def install_and_test_protobuf():
subprocess.check_call(["./configure", "--prefix="+INSTALL_DIR], cwd=PROTOBUF_DIR) subprocess.check_call(["./configure", "--prefix="+INSTALL_DIR], cwd=PROTOBUF_DIR)
subprocess.check_call(["make", "-j4"], cwd=PROTOBUF_DIR) subprocess.check_call(["make", "-j4"], cwd=PROTOBUF_DIR)
subprocess.check_call(["make", "install"], cwd=PROTOBUF_DIR) subprocess.check_call(["make", "install"], cwd=PROTOBUF_DIR)
PROTOBUF_PY_DIR = os.path.join(PROTOBUF_DIR, "python") PROTOBUF_PY_DIR = os.path.join(PROTOBUF_DIR, "python")
env = os.environ env = os.environ
env["PATH"] = env["PATH"] + ":" + os.path.join(INSTALL_DIR, "bin") env["PATH"] = env["PATH"] + ":" + os.path.join(INSTALL_DIR, "bin")
env["LD_LIBRARY_PATH"] = os.path.join(INSTALL_DIR, "lib") env["LD_LIBRARY_PATH"] = os.path.join(INSTALL_DIR, "lib")
subprocess.check_call([PYTHON_EXE, "setup.py", "build"], cwd=PROTOBUF_PY_DIR, env=env) subprocess.check_call([PYTHON_EXE, "setup.py", "build"], cwd=PROTOBUF_PY_DIR, env=env)
expected = [{"ran": 216, "failures": 0, "errors": 1}] expected = [{"ran": 216}]
run_test([PYTHON_EXE, "setup.py", "test"], cwd=PROTOBUF_PY_DIR, expected=expected, env=env) run_test([PYTHON_EXE, "setup.py", "test"], cwd=PROTOBUF_PY_DIR, expected=expected, env=env)
create_virtenv(ENV_NAME, None, force_create = True) create_virtenv(ENV_NAME, None, force_create = True)
install_and_test_protobuf() install_and_test_protobuf()
...@@ -37,13 +37,26 @@ class Both(object): ...@@ -37,13 +37,26 @@ class Both(object):
def __setslice__(self, start, stop, item): def __setslice__(self, start, stop, item):
print "called setslice on object", start, stop print "called setslice on object", start, stop
class IndexZero(object):
def __index__(self):
return 0
def __repr__(self):
return "0"
class FalseIndex(object):
def __index__(self):
return "troll"
indexable = Indexable() indexable = Indexable()
sliceable = Sliceable() sliceable = Sliceable()
index_zero = IndexZero()
false_index = FalseIndex()
both = Both() both = Both()
numbers = range(10) numbers = range(10)
# Can use index and slice notation for object with only getitem # Can use index and slice notation for object with only getitem
indexable[0] indexable[0]
indexable[index_zero]
indexable[:10] indexable[:10]
indexable[11:] indexable[11:]
indexable[:] indexable[:]
...@@ -61,17 +74,22 @@ del indexable[:] ...@@ -61,17 +74,22 @@ del indexable[:]
del indexable[3:8] del indexable[3:8]
del indexable[slice(1,12,2)] del indexable[slice(1,12,2)]
# Can't use index notation or pass in a slice for objects with only getslice
try: try:
sliceable[0] sliceable[0]
except TypeError: except TypeError:
print "no index notation with index" print "can't use index notation or pass in a slice for objects with only getslice"
try:
sliceable['a':'b']
except TypeError:
print "can't pass in any type into a slice with only getslice"
try: try:
sliceable[1:10:2] sliceable[1:10:2]
except TypeError: except TypeError:
print "need getitem to support variable-sized steps" print "need getitem to support variable-sized steps"
sliceable[index_zero:index_zero]
sliceable[:10] sliceable[:10]
sliceable[11:] sliceable[11:]
sliceable[:] sliceable[:]
...@@ -88,11 +106,21 @@ both[:] = xrange(2) ...@@ -88,11 +106,21 @@ both[:] = xrange(2)
both[3:8] = xrange(2) both[3:8] = xrange(2)
both[::2] = xrange(2) both[::2] = xrange(2)
# Should all call getitem as a fallback
both['a']
both['a':'b']
both['a':'b':'c']
del both[0] del both[0]
del both[:] del both[:]
del both[3:8] del both[3:8]
del both [::2] del both [::2]
try:
both[false_index:false_index]
except TypeError:
print "even if we have getitem, __index__ should not return a non-int"
# Number lists should have the set/get/del|item/slice functions # Number lists should have the set/get/del|item/slice functions
print numbers[0] print numbers[0]
print numbers[:] print numbers[:]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment