Commit cf9598f3 authored by Mark Florisson's avatar Mark Florisson

Support arrays in structs and implicit struct padding in buffer format strings

parent a8d81e52
......@@ -649,7 +649,6 @@ class GetAndReleaseBufferUtilityCode(object):
"""))
def mangle_dtype_name(dtype):
# Use prefixes to seperate user defined types from builtins
# (consider "typedef float unsigned_int")
......@@ -662,7 +661,9 @@ def mangle_dtype_name(dtype):
prefix = "nn_"
else:
prefix = ""
return prefix + dtype.declaration_code("").replace(" ", "_")
type_decl = dtype.declaration_code("")
type_decl = type_decl.replace(" ", "_")
return prefix + type_decl.replace("[", "_").replace("]", "_")
def get_type_information_cname(code, dtype, maxdepth=None):
# Output the run-time type information (__Pyx_TypeInfo) for given dtype,
......@@ -688,6 +689,12 @@ def get_type_information_cname(code, dtype, maxdepth=None):
code.globalstate.utility_codes.add(name)
typecode = code.globalstate['typeinfo']
arraysizes = []
if dtype.is_array:
while dtype.is_array:
arraysizes.append(dtype.size)
dtype = dtype.base_type
complex_possible = dtype.is_struct_or_union and dtype.can_be_complex()
declcode = dtype.declaration_code("")
......@@ -729,7 +736,6 @@ def get_type_information_cname(code, dtype, maxdepth=None):
elif dtype.is_pyobject:
typegroup = 'O'
else:
print dtype
assert False
if dtype.is_int:
......@@ -737,15 +743,13 @@ def get_type_information_cname(code, dtype, maxdepth=None):
else:
is_unsigned = "0"
typecode.putln(('static __Pyx_TypeInfo %s = { "%s", %s, sizeof(%s), \'%s\', %s, %s };'
) % (name,
rep,
structinfo_name,
declcode,
typegroup,
is_unsigned,
flags,
), safe=True)
typeinfo = ('static __Pyx_TypeInfo %s = '
'{ "%s", %s, sizeof(%s), { %s }, %s, \'%s\', %s, %s };')
tup = (name, rep, structinfo_name, declcode,
', '.join([str(x) for x in arraysizes]), len(arraysizes),
typegroup, is_unsigned, flags)
typecode.putln(typeinfo % tup, safe=True)
return name
def load_buffer_utility(util_code_name, context=None, **kwargs):
......
......@@ -2548,6 +2548,7 @@ class IndexNode(ExprNode):
warning(index.pos, "Index should be typed for more "
"efficient access", level=2)
IndexNode.warned_untyped_idx = True
self.memslice_index = True
index = index.coerce_to(index_type, env)
indices[i] = index
......@@ -6935,8 +6936,9 @@ class CythonArrayNode(ExprNode):
"n" * len(shapes),
", ".join(shapes)))
err = "!%s || !%s || !PyBytes_Check(%s)" % (format_temp, shapes_temp,
format_temp)
err = "!%s || !%s || !PyBytes_AsString(%s)" % (format_temp,
shapes_temp,
format_temp)
code.putln(code.error_goto_if(err, self.pos))
code.put_gotref(format_temp)
code.put_gotref(shapes_temp)
......
......@@ -157,7 +157,7 @@ def src_conforms_to_dst(src, dst):
return True
def valid_memslice_dtype(dtype):
def valid_memslice_dtype(dtype, i=0):
"""
Return whether type dtype can be used as the base type of a
memoryview slice.
......@@ -178,6 +178,8 @@ def valid_memslice_dtype(dtype):
dtype.is_error or
# Pointers are not valid (yet)
# (dtype.is_ptr and valid_memslice_dtype(dtype.base_type)) or
(dtype.is_array and i < 8 and
valid_memslice_dtype(dtype.base_type, i + 1)) or
dtype.is_numeric or
dtype.is_pyobject or
dtype.is_fused or # accept this as it will be replaced by specializations later
......
......@@ -48,6 +48,8 @@ typedef struct {
const char* name; /* for error messages only */
struct __Pyx_StructField_* fields;
size_t size; /* sizeof(type) */
size_t arraysize[8]; /* length of array in each dimension */
int ndim;
char typegroup; /* _R_eal, _C_omplex, Signed _I_nt, _U_nsigned int, _S_truct, _P_ointer, _O_bject */
char is_unsigned;
int flags;
......@@ -69,10 +71,12 @@ typedef struct {
__Pyx_BufFmt_StackElem* head;
size_t fmt_offset;
size_t new_count, enc_count;
size_t struct_alignment;
int is_complex;
char enc_type;
char new_packmode;
char enc_packmode;
char is_valid_array;
} __Pyx_BufFmt_Context;
......@@ -118,6 +122,8 @@ static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx,
ctx->enc_count = 0;
ctx->enc_type = 0;
ctx->is_complex = 0;
ctx->is_valid_array = 0;
ctx->struct_alignment = 0;
while (type->typegroup == 'S') {
++ctx->head;
ctx->head->field = type->fields;
......@@ -142,6 +148,15 @@ static int __Pyx_BufFmt_ParseNumber(const char** ts) {
return count;
}
static int __Pyx_BufFmt_ExpectNumber(const char **ts) {
int number = __Pyx_BufFmt_ParseNumber(ts);
if (number == -1) /* First char was not a digit */
PyErr_Format(PyExc_ValueError,\
"Does not understand character buffer dtype format string ('%c')", **ts);
return number;
}
static void __Pyx_BufFmt_RaiseUnexpectedChar(char ch) {
PyErr_Format(PyExc_ValueError,
"Unexpected format string character: '%c'", ch);
......@@ -239,6 +254,40 @@ static size_t __Pyx_BufFmt_TypeCharToAlignment(char ch, int is_complex) {
}
}
/* These are for computing the padding at the end of the struct to align
on the first member of the struct. This will probably the same as above,
but we don't have any guarantees.
*/
typedef struct { short x; char c; } __Pyx_pad_short;
typedef struct { int x; char c; } __Pyx_pad_int;
typedef struct { long x; char c; } __Pyx_pad_long;
typedef struct { float x; char c; } __Pyx_pad_float;
typedef struct { double x; char c; } __Pyx_pad_double;
typedef struct { long double x; char c; } __Pyx_pad_longdouble;
typedef struct { void *x; char c; } __Pyx_pad_void_p;
#ifdef HAVE_LONG_LONG
typedef struct { PY_LONG_LONG x; char c; } __Pyx_pad_longlong;
#endif
static size_t __Pyx_BufFmt_TypeCharToPadding(char ch, int is_complex) {
switch (ch) {
case '?': case 'c': case 'b': case 'B': return 1;
case 'h': case 'H': return sizeof(__Pyx_pad_short) - sizeof(short);
case 'i': case 'I': return sizeof(__Pyx_pad_int) - sizeof(int);
case 'l': case 'L': return sizeof(__Pyx_pad_long) - sizeof(long);
#ifdef HAVE_LONG_LONG
case 'q': case 'Q': return sizeof(__Pyx_pad_longlong) - sizeof(PY_LONG_LONG);
#endif
case 'f': return sizeof(__Pyx_pad_float) - sizeof(float);
case 'd': return sizeof(__Pyx_pad_double) - sizeof(double);
case 'g': return sizeof(__Pyx_pad_longdouble) - sizeof(long double);
case 'P': case 'O': return sizeof(__Pyx_pad_void_p) - sizeof(void*);
default:
__Pyx_BufFmt_RaiseUnexpectedChar(ch);
return 0;
}
}
static char __Pyx_BufFmt_TypeCharToGroup(char ch, int is_complex) {
switch (ch) {
case 'c': case 'b': case 'h': case 'i': case 'l': case 'q': return 'I';
......@@ -281,8 +330,24 @@ static void __Pyx_BufFmt_RaiseExpected(__Pyx_BufFmt_Context* ctx) {
static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) {
char group;
size_t size, offset;
size_t size, offset, arraysize = 1;
if (ctx->enc_type == 0) return 0;
/* Validate array size */
if (ctx->head->field->type->arraysize[0]) {
int i;
if (!ctx->is_valid_array) {
PyErr_Format(PyExc_ValueError, "Expected %d dimensions, got 0",
ctx->head->field->type->ndim);
return -1;
}
for (i = 0; i < ctx->head->field->type->ndim; i++) {
arraysize *= ctx->head->field->type->arraysize[i];
}
ctx->is_valid_array = 0;
}
group = __Pyx_BufFmt_TypeCharToGroup(ctx->enc_type, ctx->is_complex);
do {
__Pyx_StructField* field = ctx->head->field;
......@@ -293,12 +358,17 @@ static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) {
} else {
size = __Pyx_BufFmt_TypeCharToStandardSize(ctx->enc_type, ctx->is_complex);
}
if (ctx->enc_packmode == '@') {
size_t align_at = __Pyx_BufFmt_TypeCharToAlignment(ctx->enc_type, ctx->is_complex);
size_t align_mod_offset;
if (align_at == 0) return -1;
align_mod_offset = ctx->fmt_offset % align_at;
if (align_mod_offset > 0) ctx->fmt_offset += align_at - align_mod_offset;
if (ctx->struct_alignment == 0)
ctx->struct_alignment = __Pyx_BufFmt_TypeCharToPadding(ctx->enc_type,
ctx->is_complex);
}
if (type->size != size || type->typegroup != group) {
......@@ -324,6 +394,8 @@ static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) {
}
ctx->fmt_offset += size;
if (arraysize)
ctx->fmt_offset += (arraysize - 1) * size;
--ctx->enc_count; /* Consume from buffer string */
......@@ -360,6 +432,61 @@ static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) {
return 0;
}
/* Parse an array in the format string (e.g. (1,2,3)) */
static CYTHON_INLINE PyObject *
__pyx_buffmt_parse_array(__Pyx_BufFmt_Context* ctx, const char** tsp)
{
const char *ts = *tsp;
int i = 0, number;
int ndim = ctx->head->field->type->ndim;
;
++ts;
if (ctx->new_count != 1) {
PyErr_SetString(PyExc_ValueError,
"Cannot handle repeated arrays in format string");
return NULL;
}
/* Process the previous element */
if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
/* Parse all numbers in the format string */
while (*ts && *ts != ')') {
if (isspace(*ts))
continue;
number = __Pyx_BufFmt_ExpectNumber(&ts);
if (number == -1) return NULL;
if (i < ndim && (size_t) number != ctx->head->field->type->arraysize[i])
return PyErr_Format(PyExc_ValueError,
"Expected a dimension of size %zu, got %d",
ctx->head->field->type->arraysize[i], number);
if (*ts != ',' && *ts != ')')
return PyErr_Format(PyExc_ValueError,
"Expected a comma in format string, got '%c'", *ts);
if (*ts == ',') ts++;
i++;
}
if (i != ndim)
return PyErr_Format(PyExc_ValueError, "Expected %d dimension(s), got %d",
ctx->head->field->type->ndim, i);
if (!*ts) {
PyErr_SetString(PyExc_ValueError,
"Unexpected end of format string, expected ')'");
return NULL;
}
ctx->is_valid_array = 1;
ctx->new_count = 1;
*tsp = ++ts;
return Py_None;
}
static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts) {
int got_Z = 0;
while (1) {
......@@ -374,7 +501,7 @@ static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const cha
__Pyx_BufFmt_RaiseExpected(ctx);
return NULL;
}
return ts;
return ts;
case ' ':
case 10:
case 13:
......@@ -414,6 +541,7 @@ static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const cha
}
++ts;
ts_after_sub = ts;
ctx->struct_alignment = 0;
for (i = 0; i != struct_count; ++i) {
ts_after_sub = __Pyx_BufFmt_CheckString(ctx, ts);
if (!ts_after_sub) return NULL;
......@@ -422,7 +550,16 @@ static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const cha
}
break;
case '}': /* end of substruct; either repeat or move on */
++ts;
{
size_t alignment = ctx->struct_alignment;
++ts;
if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
ctx->enc_type = 0; /* Erase processed last struct element */
if (alignment && ctx->fmt_offset % alignment) {
/* Pad struct on size of the first member */
ctx->fmt_offset += alignment - (ctx->fmt_offset % alignment);
}
}
return ts;
case 'x':
if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
......@@ -465,14 +602,13 @@ static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const cha
while(*ts != ':') ++ts;
++ts;
break;
case '(':
if (!__pyx_buffmt_parse_array(ctx, &ts)) return NULL;
break;
default:
{
int number = __Pyx_BufFmt_ParseNumber(&ts);
if (number == -1) { /* First char was not a digit */
PyErr_Format(PyExc_ValueError,
"Does not understand character buffer dtype format string ('%c')", *ts);
return NULL;
}
int number = __Pyx_BufFmt_ExpectNumber(&ts);
if (number == -1) return NULL;
ctx->new_count = (size_t)number;
}
}
......
......@@ -394,7 +394,6 @@ cdef class memoryview(object):
info.readonly = 0
info.obj = self
# Some properties that have the same sematics as in NumPy
property T:
@cname('__pyx_memoryview_transpose')
......@@ -920,6 +919,8 @@ cdef extern from *:
char* name
__Pyx_StructField* fields
size_t size
size_t arraysize[8]
int ndim
char typegroup
char is_unsigned
int flags
......@@ -945,6 +946,11 @@ cdef extern from *:
@cname('__pyx_format_from_typeinfo')
cdef format_from_typeinfo(__Pyx_TypeInfo *type):
"""
We want to return bytes, but python 3 doesn't allow you to do anything
useful with bytes. So use str and convert back and forth to/from unicode.
Thank you python 3 for making bytes the most useless thing ever!
"""
cdef __Pyx_StructField *field
cdef __pyx_typeinfo_string fmt
......@@ -960,12 +966,16 @@ cdef format_from_typeinfo(__Pyx_TypeInfo *type):
field = type.fields
while field.type:
parts.append(format_from_typeinfo(field.type))
parts.append(format_from_typeinfo(field.type).decode('ascii'))
field += 1
result = alignment.join(parts) + '}'
else:
fmt = __Pyx_TypeInfoToFormat(type)
result = fmt.string
if type.arraysize[0]:
extents = [str(type.arraysize[i]) for i in range(type.ndim)]
result = "(%s)%s" % (','.join(extents), fmt.string.decode('ascii'))
else:
result = fmt.string.decode('ascii')
return result
return result.encode('ascii')
......@@ -46,6 +46,14 @@ cdef intp[:, :] myarray
cdef int[:] a10 = <int[:10]> object()
cdef int[:] a11 = <int[:5.4]> <int *> 1
cdef struct Valid:
int array[1][2][3][4][5][6][7][8]
cdef struct Invalid:
int array[1][2][3][4][5][6][7][8][9]
cdef Valid[:] validslice
cdef Invalid[:] invalidslice
# These are VALID
cdef int[::view.indirect_contiguous, ::view.contiguous] a9
......@@ -70,4 +78,5 @@ _ERRORS = u'''
44:10: Invalid base type for memoryview slice: intp
46:35: Can only create cython.array from pointer or array
47:24: Cannot assign type 'double' to 'Py_ssize_t'
55:13: Invalid base type for memoryview slice: Invalid
'''
......@@ -1456,6 +1456,108 @@ def test_memslice_prange(arg):
for k in range(src.shape[2]):
assert src[i, j, k] == dst[i, j, k], (src[i, j, k] == dst[i, j, k])
# Test arrays in structs
cdef struct ArrayStruct:
int ints[10]
char chars[3]
cdef packed struct PackedArrayStruct:
int ints[10]
char chars[3]
cdef fused FusedStruct:
ArrayStruct
PackedArrayStruct
@testcase
def test_memslice_struct_with_arrays():
"""
>>> test_memslice_struct_with_arrays()
abc
abc
"""
cdef ArrayStruct a1[10]
cdef PackedArrayStruct a2[10]
test_structs_with_arr(a1)
test_structs_with_arr(a2)
cdef test_structs_with_arr(FusedStruct array[10]):
cdef FusedStruct[:] myslice1, myslice2, myslice3, myslice4
cdef int i, j
myslice1 = <FusedStruct[:10]> array
for i in range(10):
for j in range(10):
myslice1[i].ints[j] = i
for j in range(3):
myslice1[i].chars[j] = 97 + j
if sys.version_info[:2] >= (2, 7):
if sys.version_info[0] < 3:
import __builtin__ as builtins
else:
import builtins
size1 = sizeof(FusedStruct)
size2 = len(builtins.memoryview(myslice1)[0])
assert size1 == size2, (size1, size2, builtins.memoryview(myslice1).format)
myslice2 = builtins.memoryview(myslice1)
for i in range(10):
assert myslice2[i].ints[i] == myslice1[i].ints[i]
assert myslice2[i].chars[i] == myslice1[i].chars[i]
myslice3 = <object> myslice1
myslice4 = myslice1
for i in range(10):
for j in range(10):
assert myslice3[i].ints[j] == myslice4[i].ints[j] == myslice1[i].ints[j]
for j in range(3):
assert myslice3[i].chars[j] == myslice4[i].chars[j] == myslice1[i].chars[j]
print myslice1[0].chars[:3].decode('ascii')
# Test padding at the end of structs in the buffer support
cdef struct PaddedAtEnd:
int a[3]
char b[3]
cdef struct AlignedNested:
PaddedAtEnd a
char chars[1]
cdef struct PaddedAtEndNormal:
int a
char b
char c
char d
cdef struct AlignedNestedNormal:
PaddedAtEndNormal a
char chars
cdef fused FusedPadded:
ArrayStruct
PackedArrayStruct
AlignedNested
AlignedNestedNormal
@testcase
def test_padded_structs():
"""
>>> test_padded_structs()
"""
cdef ArrayStruct a1[10]
_test_padded(a1)
cdef _test_padded(FusedPadded myarray[10]):
# test that the buffer format parser accepts our format string...
cdef FusedPadded[:] myslice = <FusedPadded[:10]> myarray
obj = myslice
cdef FusedPadded[:] myotherslice = obj
@testcase
def test_object_indices():
"""
......
......@@ -391,3 +391,59 @@ def acquire_release_cycle(obj):
del buf
gc.collect()
cdef packed struct StructArray:
int a[4]
char b[5]
@testcase_numpy_1_5
def test_memslice_structarray(data, dtype):
"""
>>> data = [(range(4), 'spam\\0'), (range(4, 8), 'ham\\0\\0'), (range(8, 12), 'eggs\\0')]
>>> dtype = np.dtype([('a', '4i'), ('b', '5b')])
>>> test_memslice_structarray([(L, map(ord, s)) for L, s in data], dtype)
0
1
2
3
spam
4
5
6
7
ham
8
9
10
11
eggs
Todo: test with string format specifier
"""
a = np.empty((3,), dtype=dtype)
a[:] = data
cdef StructArray[:] myslice = a
cdef int i, j
for i in range(3):
for j in range(4):
print myslice[i].a[j]
print myslice[i].b
@testcase_numpy_1_5
def test_structarray_errors(StructArray[:] a):
"""
>>> dtype = np.dtype([('a', '4i'), ('b', '5b')])
>>> test_structarray_errors(np.empty((5,), dtype=dtype))
>>> dtype = np.dtype([('a', '6i'), ('b', '5b')])
>>> test_structarray_errors(np.empty((5,), dtype=dtype))
Traceback (most recent call last):
...
ValueError: Expected a dimension of size 4, got 6
>>> dtype = np.dtype([('a', '(4,4)i'), ('b', '5b')])
>>> test_structarray_errors(np.empty((5,), dtype=dtype))
Traceback (most recent call last):
...
ValueError: Expected 1 dimension(s), got 2
"""
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment