Commit dc914e07 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Copied over CPython's str.__mod__

parent c3a27c8b
......@@ -271,6 +271,21 @@ typedef ssize_t Py_ssize_t;
#if defined(_MSC_VER)
#define Py_MEMCPY(target, source, length) do { \
size_t i_, n_ = (length); \
char *t_ = (void*) (target); \
const char *s_ = (void*) (source); \
if (n_ >= 16) \
memcpy(t_, s_, n_); \
else \
for (i_ = 0; i_ < n_; i_++) \
t_[i_] = s_[i_]; \
} while (0)
#define Py_MEMCPY memcpy
#endif /* Py_PYPORT_H */
......@@ -86,8 +86,9 @@ PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *);
PyAPI_FUNC(int) _PyString_Resize(PyObject **, Py_ssize_t);
PyAPI_FUNC(int) _PyString_Eq(PyObject *, PyObject*);
PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *);
// Pyston change: added const
PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int,
int, char**, int*);
int, const char**, int*);
PyAPI_FUNC(PyObject *) PyString_DecodeEscape(const char *, Py_ssize_t,
const char *, Py_ssize_t,
const char *);
......@@ -725,8 +725,15 @@ extern "C" void PyMem_Free(void* ptr) {
extern "C" int PyNumber_Check(PyObject*) {
extern "C" int PyNumber_Check(PyObject* obj) {
assert(obj && obj->cls);
// Our check, since we don't currently fill in tp_as_number:
if (isSubclass(obj->cls, int_cls) || isSubclass(obj->cls, long_cls))
return true;
// The CPython check:
return obj->cls->tp_as_number && (obj->cls->tp_as_number->nb_int || obj->cls->tp_as_number->nb_float);
extern "C" PyObject* PyNumber_Add(PyObject* lhs, PyObject* rhs) {
......@@ -62,6 +62,10 @@ extern "C" PyAPI_FUNC(PyObject*) _PyInt_Format(PyIntObject* v, int base, int new
extern "C" int _PyInt_AsInt(PyObject*) {
BoxedInt* interned_ints[NUM_INTERNED_INTS];
// If we don't have fast overflow-checking builtins, provide some slow variants:
......@@ -47,203 +47,621 @@ extern "C" BoxedString* strAdd(BoxedString* lhs, Box* _rhs) {
return new BoxedString(lhs->s + rhs->s);
extern "C" Box* strMod(BoxedString* lhs, Box* rhs) {
assert(lhs->cls == str_cls);
const BoxedTuple::GCVector* elts;
BoxedTuple::GCVector _elts;
if (rhs->cls == tuple_cls) {
elts = &static_cast<BoxedTuple*>(rhs)->elts;
} else {
elts = &_elts;
/* Format codes
* F_LJUST '-'
* F_SIGN '+'
* F_BLANK ' '
* F_ALT '#'
* F_ZERO '0'
#define F_LJUST (1 << 0)
#define F_SIGN (1 << 1)
#define F_BLANK (1 << 2)
#define F_ALT (1 << 3)
#define F_ZERO (1 << 4)
Py_LOCAL_INLINE(PyObject*) getnextarg(PyObject* args, Py_ssize_t arglen, Py_ssize_t* p_argidx) {
Py_ssize_t argidx = *p_argidx;
if (argidx < arglen) {
if (arglen < 0)
return args;
return PyTuple_GetItem(args, argidx);
PyErr_SetString(PyExc_TypeError, "not enough arguments for format string");
return NULL;
BoxedDict* dict = NULL;
if (rhs->cls == dict_cls)
dict = static_cast<BoxedDict*>(rhs);
extern "C" PyObject* _PyString_FormatLong(PyObject*, int, int, int, const char**, int*) {
const char* fmt = lhs->s.c_str();
const char* fmt_end = fmt + lhs->s.size();
static PyObject* formatfloat(PyObject* v, int flags, int prec, int type) {
char* p;
PyObject* result;
double x;
int elt_num = 0;
int num_elts = elts->size();
x = PyFloat_AsDouble(v);
if (x == -1.0 && PyErr_Occurred()) {
PyErr_Format(PyExc_TypeError, "float argument required, "
"not %.200s",
return NULL;
std::ostringstream os("");
while (fmt < fmt_end) {
if (*fmt != '%') {
os << (*fmt);
} else {
if (prec < 0)
prec = 6;
int nspace = 0;
int ndot = 0;
int nzero = 0;
int mode = 0;
while (true) {
RELEASE_ASSERT(fmt < fmt_end, "");
p = PyOS_double_to_string(x, type, prec, (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
Box* val_to_use = NULL;
if (*fmt == '(') {
if (dict == NULL)
raiseExcHelper(TypeError, "format requires a mapping");
if (p == NULL)
return NULL;
result = PyString_FromStringAndSize(p, strlen(p));
return result;
int pcount = 1;
const char* keystart = fmt;
while (pcount > 0 && fmt < fmt_end) {
char c = *fmt;
if (c == ')')
else if (c == '(')
Py_LOCAL_INLINE(int) formatint(char* buf, size_t buflen, int flags, int prec, int type, PyObject* v) {
/* fmt = '%#.' + `prec` + 'l' + `type`
worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
+ 1 + 1 = 24 */
char fmt[64]; /* plenty big enough! */
const char* sign;
long x;
x = PyInt_AsLong(v);
if (x == -1 && PyErr_Occurred()) {
PyErr_Format(PyExc_TypeError, "int argument required, not %.200s", Py_TYPE(v)->tp_name);
return -1;
if (x < 0 && type == 'u') {
type = 'd';
if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
sign = "-";
sign = "";
if (prec < 0)
prec = 1;
if ((flags & F_ALT) && (type == 'x' || type == 'X')) {
/* When converting under %#x or %#X, there are a number
* of issues that cause pain:
* - when 0 is being converted, the C standard leaves off
* the '0x' or '0X', which is inconsistent with other
* %#x/%#X conversions and inconsistent with Python's
* hex() function
* - there are platforms that violate the standard and
* convert 0 with the '0x' or '0X'
* (Metrowerks, Compaq Tru64)
* - there are platforms that give '0x' when converting
* under %#X, but convert 0 in accordance with the
* standard (OS/2 EMX)
* We can achieve the desired consistency by inserting our
* own '0x' or '0X' prefix, and substituting %x/%X in place
* of %#x/%#X.
* Note that this is the same approach as used in
* formatint() in unicodeobject.c
PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c", sign, type, prec, type);
} else {
PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c", sign, (flags & F_ALT) ? "#" : "", prec, type);
if (pcount > 0)
raiseExcHelper(ValueError, "incomplete format key");
/* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
* worst case buf = '-0x' + [0-9]*prec, where prec >= 11
if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
PyErr_SetString(PyExc_OverflowError, "formatted integer is too long (precision too large?)");
return -1;
if (sign[0])
PyOS_snprintf(buf, buflen, fmt, -x);
PyOS_snprintf(buf, buflen, fmt, x);
return (int)strlen(buf);
Py_LOCAL_INLINE(int) formatchar(char* buf, size_t buflen, PyObject* v) {
/* presume that the buffer is at least 2 characters long */
if (PyString_Check(v)) {
if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
return -1;
} else {
if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
return -1;
buf[1] = '\0';
return 1;
BoxedString* key = boxStrConstantSize(keystart, fmt - keystart - 1);
val_to_use = dictGetitem(dict, key);
#define FORMATBUFLEN (size_t)120
extern "C" PyObject* PyString_Format(PyObject* format, PyObject* args) {
char* fmt, *res;
Py_ssize_t arglen, argidx;
Py_ssize_t reslen, rescnt, fmtcnt;
int args_owned = 0;
PyObject* result, *orig_args;
PyObject* v, *w;
PyObject* dict = NULL;
if (format == NULL || !PyString_Check(format) || args == NULL) {
return NULL;
orig_args = args;
fmt = PyString_AS_STRING(format);
fmtcnt = PyString_GET_SIZE(format);
reslen = rescnt = fmtcnt + 100;
result = PyString_FromStringAndSize((char*)NULL, reslen);
if (result == NULL)
return NULL;
res = PyString_AsString(result);
if (PyTuple_Check(args)) {
arglen = PyTuple_GET_SIZE(args);
argidx = 0;
} else {
arglen = -1;
argidx = -2;
if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript && !PyTuple_Check(args)
&& !PyObject_TypeCheck(args, &PyBaseString_Type))
dict = args;
while (--fmtcnt >= 0) {
if (*fmt != '%') {
if (--rescnt < 0) {
rescnt = fmtcnt + 100;
reslen += rescnt;
if (_PyString_Resize(&result, reslen))
return NULL;
res = PyString_AS_STRING(result) + reslen - rescnt;
*res++ = *fmt++;
} else {
/* Got a format specifier */
int flags = 0;
Py_ssize_t width = -1;
int prec = -1;
int c = '\0';
int fill;
int isnumok;
PyObject* v = NULL;
PyObject* temp = NULL;
const char* pbuf;
int sign;
Py_ssize_t len;
char formatbuf[FORMATBUFLEN];
/* For format{int,char}() */
char* fmt_start = fmt;
Py_ssize_t argidx_start = argidx;
char c = *fmt;
if (*fmt == '(') {
char* keystart;
Py_ssize_t keylen;
PyObject* key;
int pcount = 1;
if (c == ' ') {
assert(mode == 0);
mode = 1;
} else if (c == '.') {
assert(mode == 0);
mode = 2;
} else if (mode == 0 && c == '0') {
mode = 3;
} else if ('0' <= c && c <= '9') {
assert(mode == 1 || mode == 2 || mode == 3);
if (mode == 1) {
nspace = nspace * 10 + c - '0';
} else if (mode == 2) {
ndot = ndot * 10 + c - '0';
} else if (mode == 3) {
nzero = nzero * 10 + c - '0';
} else {
if (dict == NULL) {
PyErr_SetString(PyExc_TypeError, "format requires a mapping");
goto error;
keystart = fmt;
/* Skip over balanced parentheses */
while (pcount > 0 && --fmtcnt >= 0) {
if (*fmt == ')')
else if (*fmt == '(')
} else if (c == '%') {
for (int i = 1; i < nspace; i++) {
os << ' ';
keylen = fmt - keystart - 1;
if (fmtcnt < 0 || pcount > 0) {
PyErr_SetString(PyExc_ValueError, "incomplete format key");
goto error;
key = PyString_FromStringAndSize(keystart, keylen);
if (key == NULL)
goto error;
if (args_owned) {
args_owned = 0;
args = PyObject_GetItem(dict, key);
if (args == NULL) {
goto error;
args_owned = 1;
arglen = -1;
argidx = -2;
while (--fmtcnt >= 0) {
switch (c = *fmt++) {
case '-':
flags |= F_LJUST;
case '+':
flags |= F_SIGN;
case ' ':
flags |= F_BLANK;
case '#':
flags |= F_ALT;
case '0':
flags |= F_ZERO;
os << '%';
} else if (c == 's' || c == 'r') {
RELEASE_ASSERT(ndot == 0, "");
RELEASE_ASSERT(nzero == 0, "");
RELEASE_ASSERT(nspace == 0, "");
if (!val_to_use) {
if (elt_num >= num_elts)
raiseExcHelper(TypeError, "not enough arguments for format string");
val_to_use = (*elts)[elt_num];
BoxedString* s;
if (c == 's')
s = str(val_to_use);
s = repr(val_to_use);
os << s->s;
if (c == '*') {
v = getnextarg(args, arglen, &argidx);
if (v == NULL)
goto error;
if (!PyInt_Check(v)) {
PyErr_SetString(PyExc_TypeError, "* wants int");
goto error;
width = PyInt_AsSsize_t(v);
if (width == -1 && PyErr_Occurred())
goto error;
if (width < 0) {
flags |= F_LJUST;
width = -width;
if (--fmtcnt >= 0)
c = *fmt++;
} else if (c >= 0 && isdigit(c)) {
width = c - '0';
while (--fmtcnt >= 0) {
c = Py_CHARMASK(*fmt++);
if (!isdigit(c))
if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
PyErr_SetString(PyExc_ValueError, "width too big");
goto error;
width = width * 10 + (c - '0');
if (c == '.') {
prec = 0;
if (--fmtcnt >= 0)
c = *fmt++;
if (c == '*') {
v = getnextarg(args, arglen, &argidx);
if (v == NULL)
goto error;
if (!PyInt_Check(v)) {
PyErr_SetString(PyExc_TypeError, "* wants int");
goto error;
prec = _PyInt_AsInt(v);
if (prec == -1 && PyErr_Occurred())
goto error;
if (prec < 0)
prec = 0;
if (--fmtcnt >= 0)
c = *fmt++;
} else if (c >= 0 && isdigit(c)) {
prec = c - '0';
while (--fmtcnt >= 0) {
c = Py_CHARMASK(*fmt++);
if (!isdigit(c))
} else if (c == 'c') {
if (!val_to_use) {
if (elt_num >= num_elts)
raiseExcHelper(TypeError, "not enough arguments for format string");
val_to_use = (*elts)[elt_num];
if (prec > (INT_MAX - ((int)c - '0')) / 10) {
PyErr_SetString(PyExc_ValueError, "prec too big");
goto error;
RELEASE_ASSERT(isSubclass(val_to_use->cls, int_cls), "unsupported");
RELEASE_ASSERT(nspace == 0, "unsupported");
RELEASE_ASSERT(ndot == 0, "unsupported");
RELEASE_ASSERT(nzero == 0, "unsupported");
int64_t n = static_cast<BoxedInt*>(val_to_use)->n;
if (n < 0)
raiseExcHelper(OverflowError, "unsigned byte integer is less than minimum");
if (n >= 256)
raiseExcHelper(OverflowError, "unsigned byte integer is greater than maximum");
os << (char)n;
prec = prec * 10 + (c - '0');
} /* prec */
if (fmtcnt >= 0) {
if (c == 'h' || c == 'l' || c == 'L') {
if (--fmtcnt >= 0)
c = *fmt++;
if (fmtcnt < 0) {
PyErr_SetString(PyExc_ValueError, "incomplete format");
goto error;
if (c != '%') {
v = getnextarg(args, arglen, &argidx);
if (v == NULL)
goto error;
sign = 0;
fill = ' ';
switch (c) {
case '%':
pbuf = "%";
len = 1;
} else if (c == 'd' || c == 'i') {
if (!val_to_use) {
if (elt_num >= num_elts)
raiseExcHelper(TypeError, "not enough arguments for format string");
val_to_use = (*elts)[elt_num];
RELEASE_ASSERT(isSubclass(val_to_use->cls, int_cls), "unsupported");
std::ostringstream fmt("");
fmt << '%';
if (nspace)
fmt << ' ' << nspace;
else if (ndot)
fmt << '.' << ndot;
else if (nzero)
fmt << '0' << nzero;
fmt << "ld";
char buf[20];
snprintf(buf, 20, fmt.str().c_str(), static_cast<BoxedInt*>(val_to_use)->n);
os << std::string(buf);
case 's':
if (PyUnicode_Check(v)) {
fmt = fmt_start;
argidx = argidx_start;
goto unicode;
temp = _PyObject_Str(v);
if (temp != NULL && PyUnicode_Check(temp)) {
fmt = fmt_start;
argidx = argidx_start;
goto unicode;
/* Fall through */
case 'r':
if (c == 'r')
temp = PyObject_Repr(v);
if (temp == NULL)
goto error;
if (!PyString_Check(temp)) {
PyErr_SetString(PyExc_TypeError, "%s argument has non-string str()");
goto error;
pbuf = PyString_AS_STRING(temp);
len = PyString_GET_SIZE(temp);
if (prec >= 0 && len > prec)
len = prec;
} else if (c == 'f') {
if (!val_to_use) {
if (elt_num >= num_elts)
raiseExcHelper(TypeError, "not enough arguments for format string");
val_to_use = (*elts)[elt_num];
double d;
if (val_to_use->cls == float_cls) {
d = static_cast<BoxedFloat*>(val_to_use)->d;
} else if (isSubclass(val_to_use->cls, int_cls)) {
d = static_cast<BoxedInt*>(val_to_use)->n;
case 'i':
case 'd':
case 'u':
case 'o':
case 'x':
case 'X':
if (c == 'i')
c = 'd';
isnumok = 0;
if (PyNumber_Check(v)) {
PyObject* iobj = NULL;
if (PyInt_Check(v) || (PyLong_Check(v))) {
iobj = v;
} else {
RELEASE_ASSERT(0, "unsupported");
std::ostringstream fmt("");
fmt << '%';
if (nspace)
fmt << ' ' << nspace;
else if (ndot)
fmt << '.' << ndot;
else if (nzero)
fmt << '0' << nzero;
fmt << "f";
char buf[20];
snprintf(buf, 20, fmt.str().c_str(), d);
os << std::string(buf);
iobj = PyNumber_Int(v);
if (iobj == NULL) {
iobj = PyNumber_Long(v);
if (iobj != NULL) {
if (PyInt_Check(iobj)) {
isnumok = 1;
pbuf = formatbuf;
// Pyston change:
len = formatint(formatbuf /* pbuf */, sizeof(formatbuf), flags, prec, c, iobj);
if (len < 0)
goto error;
sign = 1;
} else if (PyLong_Check(iobj)) {
int ilen;
isnumok = 1;
temp = _PyString_FormatLong(iobj, flags, prec, c, &pbuf, &ilen);
len = ilen;
if (!temp)
goto error;
sign = 1;
} else {
RELEASE_ASSERT(0, "unsupported format character '%c'", c);
if (!isnumok) {
PyErr_Format(PyExc_TypeError, "%%%c format: a number is required, "
"not %.200s",
c, Py_TYPE(v)->tp_name);
goto error;
assert(fmt == fmt_end && "incomplete format");
if (dict == NULL && elt_num < num_elts) {
raiseExcHelper(TypeError, "not all arguments converted during string formatting");
return boxString(os.str());
if (flags & F_ZERO)
fill = '0';
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
temp = formatfloat(v, flags, prec, c);
if (temp == NULL)
goto error;
pbuf = PyString_AS_STRING(temp);
len = PyString_GET_SIZE(temp);
sign = 1;
if (flags & F_ZERO)
fill = '0';
case 'c':
if (PyUnicode_Check(v)) {
fmt = fmt_start;
argidx = argidx_start;
goto unicode;
pbuf = formatbuf;
// Pyston change:
len = formatchar(formatbuf /* was pbuf */, sizeof(formatbuf), v);
if (len < 0)
goto error;
PyErr_Format(PyExc_ValueError, "unsupported format character '%c' (0x%x) "
"at index %zd",
c, c, (Py_ssize_t)(fmt - 1 - PyString_AsString(format)));
goto error;
if (sign) {
if (*pbuf == '-' || *pbuf == '+') {
sign = *pbuf++;
} else if (flags & F_SIGN)
sign = '+';
else if (flags & F_BLANK)
sign = ' ';
sign = 0;
if (width < len)
width = len;
if (rescnt - (sign != 0) < width) {
reslen -= rescnt;
rescnt = width + fmtcnt + 100;
reslen += rescnt;
if (reslen < 0) {
return PyErr_NoMemory();
if (_PyString_Resize(&result, reslen)) {
return NULL;
res = PyString_AS_STRING(result) + reslen - rescnt;
if (sign) {
if (fill != ' ')
*res++ = sign;
if (width > len)
if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
assert(pbuf[0] == '0');
assert(pbuf[1] == c);
if (fill != ' ') {
*res++ = *pbuf++;
*res++ = *pbuf++;
rescnt -= 2;
width -= 2;
if (width < 0)
width = 0;
len -= 2;
if (width > len && !(flags & F_LJUST)) {
do {
*res++ = fill;
} while (--width > len);
if (fill == ' ') {
if (sign)
*res++ = sign;
if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
assert(pbuf[0] == '0');
assert(pbuf[1] == c);
*res++ = *pbuf++;
*res++ = *pbuf++;
Py_MEMCPY(res, pbuf, len);
res += len;
rescnt -= len;
while (--width >= len) {
*res++ = ' ';
if (dict && (argidx < arglen) && c != '%') {
PyErr_SetString(PyExc_TypeError, "not all arguments converted during string formatting");
goto error;
} /* '%' */
} /* until end */
if (argidx < arglen && !dict) {
PyErr_SetString(PyExc_TypeError, "not all arguments converted during string formatting");
goto error;
if (args_owned) {
if (_PyString_Resize(&result, reslen - rescnt))
return NULL;
return result;
if (args_owned) {
args_owned = 0;
/* Fiddle args right (remove the first argidx arguments) */
if (PyTuple_Check(orig_args) && argidx > 0) {
PyObject* v;
Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
v = PyTuple_New(n);
if (v == NULL)
goto error;
while (--n >= 0) {
PyObject* w = PyTuple_GET_ITEM(orig_args, n + argidx);
PyTuple_SET_ITEM(v, n, w);
args = v;
} else {
args = orig_args;
args_owned = 1;
/* Take what we have of the result and let the Unicode formatting
function format the rest of the input. */
rescnt = res - PyString_AS_STRING(result);
if (_PyString_Resize(&result, rescnt))
goto error;
fmtcnt = PyString_GET_SIZE(format) - (fmt - PyString_AS_STRING(format));
format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
if (format == NULL)
goto error;
v = PyUnicode_Format(format, args);
if (v == NULL)
goto error;
/* Paste what we have (result) to what the Unicode formatting
function returned (v) and return the result (or error) */
w = PyUnicode_Concat(result, v);
return w;
#endif /* Py_USING_UNICODE */
if (args_owned) {
return NULL;
extern "C" Box* strMod(BoxedString* lhs, Box* rhs) {
Box* rtn = PyString_Format(lhs, rhs);
return rtn;
extern "C" Box* strMul(BoxedString* lhs, Box* rhs) {
......@@ -1021,7 +1439,8 @@ BoxedString* createUninitializedString(ssize_t n) {
char* getWriteableStringContents(BoxedString* s) {
ASSERT(s->s.size() > 0, "not sure whether this is valid for strings with zero size");
if (s->s.size() == 0)
return NULL;
// After doing some reading, I think this is ok:
......@@ -80,3 +80,4 @@ print "hello world".partition("o")
print "hello world"[False:True:True]
print "{hello}".format(hello="world")
print "%.3s" % "hello world"
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment