Commit 09df9da7 authored by Rudi Chen's avatar Rudi Chen

Handle unicode string slicing.

parent 95fface9
......@@ -470,6 +470,8 @@ extern "C" Box* listSetitemSlice(BoxedList* self, BoxedSlice* slice, Box* v) {
sliceIndex(slice->stop, &stop);
sliceIndex(slice->step, &step);
adjustNegativeIndicesOnObject(self, &start, &stop);
if (step != 1) {
int r = list_ass_ext_slice(self, slice, v);
if (r)
......
......@@ -4448,6 +4448,8 @@ Box* callItemOrSliceAttr(Box* target, BoxedString* item_str, BoxedString* slice_
sliceIndex(bslice->start, &start);
sliceIndex(bslice->stop, &stop);
adjustNegativeIndicesOnObject(target, &start, &stop);
Box* boxedStart = boxInt(start);
Box* boxedStop = boxInt(stop);
......
......@@ -30,13 +30,32 @@ bool isSliceIndex(Box* b) {
return b->cls == none_cls || b->cls == int_cls || PyIndex_Check(b);
}
void boundSliceWithLength(i64* start_out, i64* stop_out, i64 start, i64 stop, i64 size) {
void adjustNegativeIndicesOnObject(Box* obj, i64* start_out, i64* stop_out) {
i64 start = *start_out;
i64 stop = *stop_out;
PySequenceMethods* m;
// Logic from PySequence_GetSlice:
if (start < 0)
start += size;
if (stop < 0)
stop += size;
m = obj->cls->tp_as_sequence;
if (m && m->sq_slice) {
if (start < 0 || stop < 0) {
if (m->sq_length) {
Py_ssize_t l = (*m->sq_length)(obj);
if (l >= 0) {
if (start < 0)
start += l;
if (stop < 0)
stop += l;
}
}
}
}
*start_out = start;
*stop_out = stop;
}
void boundSliceWithLength(i64* start_out, i64* stop_out, i64 start, i64 stop, i64 size) {
if (start < 0)
start = 0;
else if (start > size)
......
......@@ -38,10 +38,10 @@ inline void sliceIndex(Box* b, int64_t* out) {
bool isSliceIndex(Box* b);
void adjustNegativeIndicesOnObject(Box* obj, i64* start, i64* stop);
// Adjust the start and stop bounds of the sequence we are slicing to its size.
// Negative values greater or equal to (-length) become positive values.
// Ensure stop >= start
// Remain within bounds.
// Ensure stop >= start and remain within bounds.
void boundSliceWithLength(i64* start_out, i64* stop_out, i64 start, i64 stop, i64 size);
template <typename T> void copySlice(T* __restrict__ dst, const T* __restrict__ src, i64 start, i64 step, i64 length) {
......
......@@ -53,6 +53,8 @@ index_zero = IndexZero()
false_index = FalseIndex()
both = Both()
numbers = range(10)
letters = "abcde"
unicodestr = unicode("abcde")
# Can use index and slice notation for object with only getitem
indexable[0]
......@@ -109,7 +111,9 @@ both[::2] = xrange(2)
# Should all call getitem as a fallback
both['a']
both['a':'b']
both['a':'b':'c']
both[1:'b']
both['a':2]
both[1:2:'c']
del both[0]
del both[:]
......@@ -139,10 +143,29 @@ print numbers[:-2]
print numbers[-2:]
# String support slicing
print "abcd"[2]
print "abcd"[:2]
print "abcd"[2:]
print "abcd"[1:3]
print letters[2]
print letters[:2]
print letters[2:]
print letters[1:3]
print letters[:-2]
print letters[-2:]
# Unicode string support slicing
# Note that unicode strings are not the same type of object as strings,
# (but both have base class basestring)
print unicodestr[2]
print unicodestr[:2]
print unicodestr[2:]
print unicodestr[1:3]
print unicodestr[:-2]
print unicodestr[-2:]
# Calling the slice operator directly does not have the same behavior
# as using the slice notation []. Namely, it will not modify negative
# indices.
print numbers.__getslice__(0, -1);
print letters.__getslice__(0, -1);
print unicodestr.__getslice__(0, -1);
# Other
class C(object):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment