Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
P
Pyston
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Boxiang Sun
Pyston
Commits
a7c10a6e
Commit
a7c10a6e
authored
Feb 19, 2015
by
Kevin Modzelewski
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'undingen-codecs'
parents
2642c04e
03426fc2
Changes
23
Hide whitespace changes
Inline
Side-by-side
Showing
23 changed files
with
1826 additions
and
33 deletions
+1826
-33
Makefile
Makefile
+1
-1
from_cpython/CMakeLists.txt
from_cpython/CMakeLists.txt
+1
-1
from_cpython/Include/Python.h
from_cpython/Include/Python.h
+1
-0
from_cpython/Include/abstract.h
from_cpython/Include/abstract.h
+6
-6
from_cpython/Include/codecs.h
from_cpython/Include/codecs.h
+141
-0
from_cpython/Include/import.h
from_cpython/Include/import.h
+2
-2
from_cpython/Include/unicodeobject.h
from_cpython/Include/unicodeobject.h
+1
-1
from_cpython/Lib/encodings/__init__.py
from_cpython/Lib/encodings/__init__.py
+3
-1
minibenchmarks/pyaes.py
minibenchmarks/pyaes.py
+508
-0
src/capi/abstract.cpp
src/capi/abstract.cpp
+8
-4
src/capi/codecs.cpp
src/capi/codecs.cpp
+790
-0
src/capi/modsupport.cpp
src/capi/modsupport.cpp
+8
-0
src/runtime/builtin_modules/builtins.cpp
src/runtime/builtin_modules/builtins.cpp
+26
-6
src/runtime/builtin_modules/sys.cpp
src/runtime/builtin_modules/sys.cpp
+7
-0
src/runtime/capi.cpp
src/runtime/capi.cpp
+18
-2
src/runtime/import.cpp
src/runtime/import.cpp
+23
-4
src/runtime/str.cpp
src/runtime/str.cpp
+230
-2
src/runtime/tuple.cpp
src/runtime/tuple.cpp
+1
-3
src/runtime/types.cpp
src/runtime/types.cpp
+2
-0
src/runtime/unicode.cpp
src/runtime/unicode.cpp
+36
-0
test/tests/optparse_test.py
test/tests/optparse_test.py
+1
-0
test/tests/str_encode_decode.py
test/tests/str_encode_decode.py
+11
-0
test/tests/sys_test.py
test/tests/sys_test.py
+1
-0
No files found.
Makefile
View file @
a7c10a6e
...
@@ -291,7 +291,7 @@ STDLIB_OBJS := stdlib.bc.o stdlib.stripped.bc.o
...
@@ -291,7 +291,7 @@ STDLIB_OBJS := stdlib.bc.o stdlib.stripped.bc.o
STDLIB_RELEASE_OBJS
:=
stdlib.release.bc.o
STDLIB_RELEASE_OBJS
:=
stdlib.release.bc.o
ASM_SRCS
:=
$(
wildcard
src/runtime/
*
.S
)
ASM_SRCS
:=
$(
wildcard
src/runtime/
*
.S
)
STDMODULE_SRCS
:=
errnomodule.c shamodule.c sha256module.c sha512module.c _math.c mathmodule.c md5.c md5module.c _randommodule.c _sre.c operator.c binascii.c pwdmodule.c posixmodule.c _struct.c datetimemodule.c _functoolsmodule.c _collectionsmodule.c itertoolsmodule.c resource.c signalmodule.c selectmodule.c fcntlmodule.c timemodule.c arraymodule.c zlibmodule.c
$(EXTRA_STDMODULE_SRCS)
STDMODULE_SRCS
:=
errnomodule.c shamodule.c sha256module.c sha512module.c _math.c mathmodule.c md5.c md5module.c _randommodule.c _sre.c operator.c binascii.c pwdmodule.c posixmodule.c _struct.c datetimemodule.c _functoolsmodule.c _collectionsmodule.c itertoolsmodule.c resource.c signalmodule.c selectmodule.c fcntlmodule.c timemodule.c arraymodule.c zlibmodule.c
_codecsmodule.c
$(EXTRA_STDMODULE_SRCS)
STDOBJECT_SRCS
:=
structseq.c capsule.c stringobject.c
$(EXTRA_STDOBJECT_SRCS)
STDOBJECT_SRCS
:=
structseq.c capsule.c stringobject.c
$(EXTRA_STDOBJECT_SRCS)
STDPYTHON_SRCS
:=
pyctype.c getargs.c formatter_string.c pystrtod.c dtoa.c
$(EXTRA_STDPYTHON_SRCS)
STDPYTHON_SRCS
:=
pyctype.c getargs.c formatter_string.c pystrtod.c dtoa.c
$(EXTRA_STDPYTHON_SRCS)
FROM_CPYTHON_SRCS
:=
$(
addprefix
from_cpython/Modules/,
$(STDMODULE_SRCS)
)
$(
addprefix
from_cpython/Objects/,
$(STDOBJECT_SRCS)
)
$(
addprefix
from_cpython/Python/,
$(STDPYTHON_SRCS)
)
FROM_CPYTHON_SRCS
:=
$(
addprefix
from_cpython/Modules/,
$(STDMODULE_SRCS)
)
$(
addprefix
from_cpython/Objects/,
$(STDOBJECT_SRCS)
)
$(
addprefix
from_cpython/Python/,
$(STDPYTHON_SRCS)
)
...
...
from_cpython/CMakeLists.txt
View file @
a7c10a6e
...
@@ -15,7 +15,7 @@ endforeach(STDLIB_FILE)
...
@@ -15,7 +15,7 @@ endforeach(STDLIB_FILE)
add_custom_target
(
copy_stdlib ALL DEPENDS
${
STDLIB_TARGETS
}
)
add_custom_target
(
copy_stdlib ALL DEPENDS
${
STDLIB_TARGETS
}
)
# compile specified files in from_cpython/Modules
# compile specified files in from_cpython/Modules
file
(
GLOB_RECURSE STDMODULE_SRCS Modules errnomodule.c shamodule.c sha256module.c sha512module.c _math.c mathmodule.c md5.c md5module.c _randommodule.c _sre.c operator.c binascii.c pwdmodule.c posixmodule.c _struct.c datetimemodule.c _functoolsmodule.c _collectionsmodule.c itertoolsmodule.c resource.c signalmodule.c selectmodule.c fcntlmodule.c timemodule.c arraymodule.c zlibmodule.c
)
file
(
GLOB_RECURSE STDMODULE_SRCS Modules errnomodule.c shamodule.c sha256module.c sha512module.c _math.c mathmodule.c md5.c md5module.c _randommodule.c _sre.c operator.c binascii.c pwdmodule.c posixmodule.c _struct.c datetimemodule.c _functoolsmodule.c _collectionsmodule.c itertoolsmodule.c resource.c signalmodule.c selectmodule.c fcntlmodule.c timemodule.c arraymodule.c zlibmodule.c
_codecsmodule.c
)
# compile specified files in from_cpython/Objects
# compile specified files in from_cpython/Objects
file
(
GLOB_RECURSE STDOBJECT_SRCS Objects structseq.c capsule.c stringobject.c
)
file
(
GLOB_RECURSE STDOBJECT_SRCS Objects structseq.c capsule.c stringobject.c
)
...
...
from_cpython/Include/Python.h
View file @
a7c10a6e
...
@@ -72,6 +72,7 @@
...
@@ -72,6 +72,7 @@
#include "descrobject.h"
#include "descrobject.h"
#include "warnings.h"
#include "warnings.h"
#include "codecs.h"
#include "pyerrors.h"
#include "pyerrors.h"
#include "pystate.h"
#include "pystate.h"
...
...
from_cpython/Include/abstract.h
View file @
a7c10a6e
...
@@ -326,7 +326,7 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/
...
@@ -326,7 +326,7 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/
*/
*/
PyAPI_FUNC
(
PyObject
*
)
PyObject_CallFunction
(
PyObject
*
callable_object
,
PyAPI_FUNC
(
PyObject
*
)
PyObject_CallFunction
(
PyObject
*
callable_object
,
char
*
format
,
...)
PYSTON_NOEXCEPT
;
c
onst
c
har
*
format
,
...)
PYSTON_NOEXCEPT
;
/*
/*
Call a callable Python object, callable_object, with a
Call a callable Python object, callable_object, with a
...
@@ -339,8 +339,8 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/
...
@@ -339,8 +339,8 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/
*/
*/
PyAPI_FUNC
(
PyObject
*
)
PyObject_CallMethod
(
PyObject
*
o
,
char
*
m
,
PyAPI_FUNC
(
PyObject
*
)
PyObject_CallMethod
(
PyObject
*
o
,
c
onst
c
har
*
m
,
char
*
format
,
...)
PYSTON_NOEXCEPT
;
c
onst
c
har
*
format
,
...)
PYSTON_NOEXCEPT
;
/*
/*
Call the method named m of object o with a variable number of
Call the method named m of object o with a variable number of
...
@@ -352,10 +352,10 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/
...
@@ -352,10 +352,10 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/
*/
*/
PyAPI_FUNC
(
PyObject
*
)
_PyObject_CallFunction_SizeT
(
PyObject
*
callable
,
PyAPI_FUNC
(
PyObject
*
)
_PyObject_CallFunction_SizeT
(
PyObject
*
callable
,
char
*
format
,
...)
PYSTON_NOEXCEPT
;
c
onst
c
har
*
format
,
...)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
PyObject
*
)
_PyObject_CallMethod_SizeT
(
PyObject
*
o
,
PyAPI_FUNC
(
PyObject
*
)
_PyObject_CallMethod_SizeT
(
PyObject
*
o
,
char
*
name
,
c
onst
c
har
*
name
,
char
*
format
,
...)
PYSTON_NOEXCEPT
;
c
onst
c
har
*
format
,
...)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
PyObject
*
)
PyObject_CallFunctionObjArgs
(
PyObject
*
callable
,
PyAPI_FUNC
(
PyObject
*
)
PyObject_CallFunctionObjArgs
(
PyObject
*
callable
,
...)
PYSTON_NOEXCEPT
;
...)
PYSTON_NOEXCEPT
;
...
...
from_cpython/Include/codecs.h
0 → 100644
View file @
a7c10a6e
// This file is originally from CPython 2.7, with modifications for Pyston
#ifndef Py_CODECREGISTRY_H
#define Py_CODECREGISTRY_H
#ifdef __cplusplus
extern
"C"
{
#endif
/* ------------------------------------------------------------------------
Python Codec Registry and support functions
Written by Marc-Andre Lemburg (mal@lemburg.com).
Copyright (c) Corporation for National Research Initiatives.
------------------------------------------------------------------------ */
/* Register a new codec search function.
As side effect, this tries to load the encodings package, if not
yet done, to make sure that it is always first in the list of
search functions.
The search_function's refcount is incremented by this function. */
PyAPI_FUNC
(
int
)
PyCodec_Register
(
PyObject
*
search_function
)
PYSTON_NOEXCEPT
;
/* Codec register lookup API.
Looks up the given encoding and returns a CodecInfo object with
function attributes which implement the different aspects of
processing the encoding.
The encoding string is looked up converted to all lower-case
characters. This makes encodings looked up through this mechanism
effectively case-insensitive.
If no codec is found, a KeyError is set and NULL returned.
As side effect, this tries to load the encodings package, if not
yet done. This is part of the lazy load strategy for the encodings
package.
*/
PyAPI_FUNC
(
PyObject
*
)
_PyCodec_Lookup
(
const
char
*
encoding
)
PYSTON_NOEXCEPT
;
/* Generic codec based encoding API.
object is passed through the encoder function found for the given
encoding using the error handling method defined by errors. errors
may be NULL to use the default method defined for the codec.
Raises a LookupError in case no encoder can be found.
*/
PyAPI_FUNC
(
PyObject
*
)
PyCodec_Encode
(
PyObject
*
object
,
const
char
*
encoding
,
const
char
*
errors
)
PYSTON_NOEXCEPT
;
/* Generic codec based decoding API.
object is passed through the decoder function found for the given
encoding using the error handling method defined by errors. errors
may be NULL to use the default method defined for the codec.
Raises a LookupError in case no encoder can be found.
*/
PyAPI_FUNC
(
PyObject
*
)
PyCodec_Decode
(
PyObject
*
object
,
const
char
*
encoding
,
const
char
*
errors
)
PYSTON_NOEXCEPT
;
/* --- Codec Lookup APIs --------------------------------------------------
All APIs return a codec object with incremented refcount and are
based on _PyCodec_Lookup(). The same comments w/r to the encoding
name also apply to these APIs.
*/
/* Get an encoder function for the given encoding. */
PyAPI_FUNC
(
PyObject
*
)
PyCodec_Encoder
(
const
char
*
encoding
)
PYSTON_NOEXCEPT
;
/* Get a decoder function for the given encoding. */
PyAPI_FUNC
(
PyObject
*
)
PyCodec_Decoder
(
const
char
*
encoding
)
PYSTON_NOEXCEPT
;
/* Get a IncrementalEncoder object for the given encoding. */
PyAPI_FUNC
(
PyObject
*
)
PyCodec_IncrementalEncoder
(
const
char
*
encoding
,
const
char
*
errors
)
PYSTON_NOEXCEPT
;
/* Get a IncrementalDecoder object function for the given encoding. */
PyAPI_FUNC
(
PyObject
*
)
PyCodec_IncrementalDecoder
(
const
char
*
encoding
,
const
char
*
errors
)
PYSTON_NOEXCEPT
;
/* Get a StreamReader factory function for the given encoding. */
PyAPI_FUNC
(
PyObject
*
)
PyCodec_StreamReader
(
const
char
*
encoding
,
PyObject
*
stream
,
const
char
*
errors
)
PYSTON_NOEXCEPT
;
/* Get a StreamWriter factory function for the given encoding. */
PyAPI_FUNC
(
PyObject
*
)
PyCodec_StreamWriter
(
const
char
*
encoding
,
PyObject
*
stream
,
const
char
*
errors
)
PYSTON_NOEXCEPT
;
/* Unicode encoding error handling callback registry API */
/* Register the error handling callback function error under the given
name. This function will be called by the codec when it encounters
unencodable characters/undecodable bytes and doesn't know the
callback name, when name is specified as the error parameter
in the call to the encode/decode function.
Return 0 on success, -1 on error */
PyAPI_FUNC
(
int
)
PyCodec_RegisterError
(
const
char
*
name
,
PyObject
*
error
)
PYSTON_NOEXCEPT
;
/* Lookup the error handling callback function registered under the given
name. As a special case NULL can be passed, in which case
the error handling callback for "strict" will be returned. */
PyAPI_FUNC
(
PyObject
*
)
PyCodec_LookupError
(
const
char
*
name
)
PYSTON_NOEXCEPT
;
/* raise exc as an exception */
PyAPI_FUNC
(
PyObject
*
)
PyCodec_StrictErrors
(
PyObject
*
exc
)
PYSTON_NOEXCEPT
;
/* ignore the unicode error, skipping the faulty input */
PyAPI_FUNC
(
PyObject
*
)
PyCodec_IgnoreErrors
(
PyObject
*
exc
)
PYSTON_NOEXCEPT
;
/* replace the unicode encode error with ? or U+FFFD */
PyAPI_FUNC
(
PyObject
*
)
PyCodec_ReplaceErrors
(
PyObject
*
exc
)
PYSTON_NOEXCEPT
;
/* replace the unicode encode error with XML character references */
PyAPI_FUNC
(
PyObject
*
)
PyCodec_XMLCharRefReplaceErrors
(
PyObject
*
exc
)
PYSTON_NOEXCEPT
;
/* replace the unicode encode error with backslash escapes (\x, \u and \U) */
PyAPI_FUNC
(
PyObject
*
)
PyCodec_BackslashReplaceErrors
(
PyObject
*
exc
)
PYSTON_NOEXCEPT
;
#ifdef __cplusplus
}
#endif
#endif
/* !Py_CODECREGISTRY_H */
from_cpython/Include/import.h
View file @
a7c10a6e
...
@@ -9,14 +9,14 @@ extern "C" {
...
@@ -9,14 +9,14 @@ extern "C" {
#endif
#endif
PyAPI_FUNC
(
long
)
PyImport_GetMagicNumber
(
void
)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
long
)
PyImport_GetMagicNumber
(
void
)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
PyObject
*
)
PyImport_ExecCodeModule
(
char
*
name
,
PyObject
*
co
)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
PyObject
*
)
PyImport_ExecCodeModule
(
c
onst
c
har
*
name
,
PyObject
*
co
)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
PyObject
*
)
PyImport_ExecCodeModuleEx
(
PyAPI_FUNC
(
PyObject
*
)
PyImport_ExecCodeModuleEx
(
char
*
name
,
PyObject
*
co
,
char
*
pathname
)
PYSTON_NOEXCEPT
;
char
*
name
,
PyObject
*
co
,
char
*
pathname
)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
PyObject
*
)
PyImport_GetModuleDict
(
void
)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
PyObject
*
)
PyImport_GetModuleDict
(
void
)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
PyObject
*
)
PyImport_AddModule
(
const
char
*
name
)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
PyObject
*
)
PyImport_AddModule
(
const
char
*
name
)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
PyObject
*
)
PyImport_ImportModule
(
const
char
*
name
)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
PyObject
*
)
PyImport_ImportModule
(
const
char
*
name
)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
PyObject
*
)
PyImport_ImportModuleNoBlock
(
const
char
*
)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
PyObject
*
)
PyImport_ImportModuleNoBlock
(
const
char
*
)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
PyObject
*
)
PyImport_ImportModuleLevel
(
char
*
name
,
PyAPI_FUNC
(
PyObject
*
)
PyImport_ImportModuleLevel
(
c
onst
c
har
*
name
,
PyObject
*
globals
,
PyObject
*
locals
,
PyObject
*
fromlist
,
int
level
)
PYSTON_NOEXCEPT
;
PyObject
*
globals
,
PyObject
*
locals
,
PyObject
*
fromlist
,
int
level
)
PYSTON_NOEXCEPT
;
#define PyImport_ImportModuleEx(n, g, l, f) \
#define PyImport_ImportModuleEx(n, g, l, f) \
...
...
from_cpython/Include/unicodeobject.h
View file @
a7c10a6e
...
@@ -986,7 +986,7 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(
...
@@ -986,7 +986,7 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(
const
char
*
string
,
const
char
*
string
,
Py_ssize_t
length
,
Py_ssize_t
length
,
const
char
*
errors
const
char
*
errors
);
)
PYSTON_NOEXCEPT
;
/* --- Latin-1 Codecs -----------------------------------------------------
/* --- Latin-1 Codecs -----------------------------------------------------
...
...
from_cpython/Lib/encodings/__init__.py
View file @
a7c10a6e
...
@@ -43,7 +43,9 @@ _norm_encoding_map = (' . '
...
@@ -43,7 +43,9 @@ _norm_encoding_map = (' . '
' '
)
' '
)
_aliases
=
aliases
.
aliases
_aliases
=
aliases
.
aliases
class
CodecRegistryError
(
LookupError
,
SystemError
):
# Pyston change: we don't support multiple inheritance yet
#class CodecRegistryError(LookupError, SystemError):
class
CodecRegistryError
(
LookupError
):
pass
pass
def
normalize_encoding
(
encoding
):
def
normalize_encoding
(
encoding
):
...
...
minibenchmarks/pyaes.py
0 → 100644
View file @
a7c10a6e
"""Simple AES cipher implementation in pure Python following PEP-272 API
Homepage: https://bitbucket.org/intgr/pyaes/
The goal of this module is to be as fast as reasonable in Python while still
being Pythonic and readable/understandable. It is licensed under the permissive
MIT license.
Hopefully the code is readable and commented enough that it can serve as an
introduction to the AES cipher for Python coders. In fact, it should go along
well with the Stick Figure Guide to AES:
http://www.moserware.com/2009/09/stick-figure-guide-to-advanced.html
Contrary to intuition, this implementation numbers the 4x4 matrices from top to
bottom for efficiency reasons::
0 4 8 12
1 5 9 13
2 6 10 14
3 7 11 15
Effectively it's the transposition of what you'd expect. This actually makes
the code simpler -- except the ShiftRows step, but hopefully the explanation
there clears it up.
"""
####
# Copyright (c) 2010 Marti Raudsepp <marti@juffo.org>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
####
from
array
import
array
# Globals mandated by PEP 272:
# http://www.python.org/dev/peps/pep-0272/
MODE_ECB
=
1
MODE_CBC
=
2
#MODE_CTR = 6
block_size
=
16
key_size
=
None
def
new
(
key
,
mode
,
IV
=
None
):
if
mode
==
MODE_ECB
:
return
ECBMode
(
AES
(
key
))
elif
mode
==
MODE_CBC
:
if
IV
is
None
:
raise
ValueError
,
"CBC mode needs an IV value!"
return
CBCMode
(
AES
(
key
),
IV
)
else
:
raise
NotImplementedError
#### AES cipher implementation
class
AES
(
object
):
block_size
=
16
def
__init__
(
self
,
key
):
self
.
setkey
(
key
)
def
setkey
(
self
,
key
):
"""Sets the key and performs key expansion."""
self
.
key
=
key
self
.
key_size
=
len
(
key
)
if
self
.
key_size
==
16
:
self
.
rounds
=
10
elif
self
.
key_size
==
24
:
self
.
rounds
=
12
elif
self
.
key_size
==
32
:
self
.
rounds
=
14
else
:
raise
ValueError
,
"Key length must be 16, 24 or 32 bytes"
self
.
expand_key
()
def
expand_key
(
self
):
"""Performs AES key expansion on self.key and stores in self.exkey"""
# The key schedule specifies how parts of the key are fed into the
# cipher's round functions. "Key expansion" means performing this
# schedule in advance. Almost all implementations do this.
#
# Here's a description of AES key schedule:
# http://en.wikipedia.org/wiki/Rijndael_key_schedule
# The expanded key starts with the actual key itself
exkey
=
array
(
'B'
,
self
.
key
)
# extra key expansion steps
if
self
.
key_size
==
16
:
extra_cnt
=
0
elif
self
.
key_size
==
24
:
extra_cnt
=
2
else
:
extra_cnt
=
3
# 4-byte temporary variable for key expansion
word
=
exkey
[
-
4
:]
# Each expansion cycle uses 'i' once for Rcon table lookup
for
i
in
xrange
(
1
,
11
):
#### key schedule core:
# left-rotate by 1 byte
word
=
word
[
1
:
4
]
+
word
[
0
:
1
]
# apply S-box to all bytes
for
j
in
xrange
(
4
):
word
[
j
]
=
aes_sbox
[
word
[
j
]]
# apply the Rcon table to the leftmost byte
word
[
0
]
=
word
[
0
]
^
aes_Rcon
[
i
]
#### end key schedule core
for
z
in
xrange
(
4
):
for
j
in
xrange
(
4
):
# mix in bytes from the last subkey
word
[
j
]
^=
exkey
[
-
self
.
key_size
+
j
]
exkey
.
extend
(
word
)
# Last key expansion cycle always finishes here
if
len
(
exkey
)
>=
(
self
.
rounds
+
1
)
*
self
.
block_size
:
break
# Special substitution step for 256-bit key
if
self
.
key_size
==
32
:
for
j
in
xrange
(
4
):
# mix in bytes from the last subkey XORed with S-box of
# current word bytes
word
[
j
]
=
aes_sbox
[
word
[
j
]]
^
exkey
[
-
self
.
key_size
+
j
]
exkey
.
extend
(
word
)
# Twice for 192-bit key, thrice for 256-bit key
for
z
in
xrange
(
extra_cnt
):
for
j
in
xrange
(
4
):
# mix in bytes from the last subkey
word
[
j
]
^=
exkey
[
-
self
.
key_size
+
j
]
exkey
.
extend
(
word
)
self
.
exkey
=
exkey
def
add_round_key
(
self
,
block
,
round
):
"""AddRoundKey step in AES. This is where the key is mixed into plaintext"""
offset
=
round
*
16
exkey
=
self
.
exkey
for
i
in
xrange
(
16
):
block
[
i
]
^=
exkey
[
offset
+
i
]
#print 'AddRoundKey:', block
def
sub_bytes
(
self
,
block
,
sbox
):
"""SubBytes step, apply S-box to all bytes
Depending on whether encrypting or decrypting, a different sbox array
is passed in.
"""
for
i
in
xrange
(
16
):
block
[
i
]
=
sbox
[
block
[
i
]]
#print 'SubBytes :', block
def
shift_rows
(
self
,
b
):
"""ShiftRows step. Shifts 2nd row to left by 1, 3rd row by 2, 4th row by 3
Since we're performing this on a transposed matrix, cells are numbered
from top to bottom::
0 4 8 12 -> 0 4 8 12 -- 1st row doesn't change
1 5 9 13 -> 5 9 13 1 -- row shifted to left by 1 (wraps around)
2 6 10 14 -> 10 14 2 6 -- shifted by 2
3 7 11 15 -> 15 3 7 11 -- shifted by 3
"""
b
[
1
],
b
[
5
],
b
[
9
],
b
[
13
]
=
b
[
5
],
b
[
9
],
b
[
13
],
b
[
1
]
b
[
2
],
b
[
6
],
b
[
10
],
b
[
14
]
=
b
[
10
],
b
[
14
],
b
[
2
],
b
[
6
]
b
[
3
],
b
[
7
],
b
[
11
],
b
[
15
]
=
b
[
15
],
b
[
3
],
b
[
7
],
b
[
11
]
#print 'ShiftRows :', b
def
shift_rows_inv
(
self
,
b
):
"""Similar to shift_rows above, but performed in inverse for decryption."""
b
[
5
],
b
[
9
],
b
[
13
],
b
[
1
]
=
b
[
1
],
b
[
5
],
b
[
9
],
b
[
13
]
b
[
10
],
b
[
14
],
b
[
2
],
b
[
6
]
=
b
[
2
],
b
[
6
],
b
[
10
],
b
[
14
]
b
[
15
],
b
[
3
],
b
[
7
],
b
[
11
]
=
b
[
3
],
b
[
7
],
b
[
11
],
b
[
15
]
#print 'ShiftRows :', b
def
mix_columns
(
self
,
block
):
"""MixColumns step. Mixes the values in each column"""
# Cache global multiplication tables (see below)
mul_by_2
=
gf_mul_by_2
mul_by_3
=
gf_mul_by_3
# Since we're dealing with a transposed matrix, columns are already
# sequential
for
i
in
xrange
(
4
):
col
=
i
*
4
#v0, v1, v2, v3 = block[col : col+4]
v0
,
v1
,
v2
,
v3
=
(
block
[
col
],
block
[
col
+
1
],
block
[
col
+
2
],
block
[
col
+
3
])
block
[
col
]
=
mul_by_2
[
v0
]
^
v3
^
v2
^
mul_by_3
[
v1
]
block
[
col
+
1
]
=
mul_by_2
[
v1
]
^
v0
^
v3
^
mul_by_3
[
v2
]
block
[
col
+
2
]
=
mul_by_2
[
v2
]
^
v1
^
v0
^
mul_by_3
[
v3
]
block
[
col
+
3
]
=
mul_by_2
[
v3
]
^
v2
^
v1
^
mul_by_3
[
v0
]
#print 'MixColumns :', block
def
mix_columns_inv
(
self
,
block
):
"""Similar to mix_columns above, but performed in inverse for decryption."""
# Cache global multiplication tables (see below)
mul_9
=
gf_mul_by_9
mul_11
=
gf_mul_by_11
mul_13
=
gf_mul_by_13
mul_14
=
gf_mul_by_14
# Since we're dealing with a transposed matrix, columns are already
# sequential
for
i
in
xrange
(
4
):
col
=
i
*
4
v0
,
v1
,
v2
,
v3
=
(
block
[
col
],
block
[
col
+
1
],
block
[
col
+
2
],
block
[
col
+
3
])
#v0, v1, v2, v3 = block[col:col+4]
block
[
col
]
=
mul_14
[
v0
]
^
mul_9
[
v3
]
^
mul_13
[
v2
]
^
mul_11
[
v1
]
block
[
col
+
1
]
=
mul_14
[
v1
]
^
mul_9
[
v0
]
^
mul_13
[
v3
]
^
mul_11
[
v2
]
block
[
col
+
2
]
=
mul_14
[
v2
]
^
mul_9
[
v1
]
^
mul_13
[
v0
]
^
mul_11
[
v3
]
block
[
col
+
3
]
=
mul_14
[
v3
]
^
mul_9
[
v2
]
^
mul_13
[
v1
]
^
mul_11
[
v0
]
#print 'MixColumns :', block
def
encrypt_block
(
self
,
block
):
"""Encrypts a single block. This is the main AES function"""
# For efficiency reasons, the state between steps is transmitted via a
# mutable array, not returned.
self
.
add_round_key
(
block
,
0
)
for
round
in
xrange
(
1
,
self
.
rounds
):
self
.
sub_bytes
(
block
,
aes_sbox
)
self
.
shift_rows
(
block
)
self
.
mix_columns
(
block
)
self
.
add_round_key
(
block
,
round
)
self
.
sub_bytes
(
block
,
aes_sbox
)
self
.
shift_rows
(
block
)
# no mix_columns step in the last round
self
.
add_round_key
(
block
,
self
.
rounds
)
def
decrypt_block
(
self
,
block
):
"""Decrypts a single block. This is the main AES decryption function"""
# For efficiency reasons, the state between steps is transmitted via a
# mutable array, not returned.
self
.
add_round_key
(
block
,
self
.
rounds
)
# count rounds down from 15 ... 1
for
round
in
xrange
(
self
.
rounds
-
1
,
0
,
-
1
):
self
.
shift_rows_inv
(
block
)
self
.
sub_bytes
(
block
,
aes_inv_sbox
)
self
.
add_round_key
(
block
,
round
)
self
.
mix_columns_inv
(
block
)
self
.
shift_rows_inv
(
block
)
self
.
sub_bytes
(
block
,
aes_inv_sbox
)
self
.
add_round_key
(
block
,
0
)
# no mix_columns step in the last round
#### ECB mode implementation
class
ECBMode
(
object
):
"""Electronic CodeBook (ECB) mode encryption.
Basically this mode applies the cipher function to each block individually;
no feedback is done. NB! This is insecure for almost all purposes
"""
def
__init__
(
self
,
cipher
):
self
.
cipher
=
cipher
self
.
block_size
=
cipher
.
block_size
def
ecb
(
self
,
data
,
block_func
):
"""Perform ECB mode with the given function"""
if
len
(
data
)
%
self
.
block_size
!=
0
:
raise
ValueError
,
"Plaintext length must be multiple of 16"
block_size
=
self
.
block_size
data
=
array
(
'B'
,
data
)
for
offset
in
xrange
(
0
,
len
(
data
),
block_size
):
block
=
data
[
offset
:
offset
+
block_size
]
block_func
(
block
)
data
[
offset
:
offset
+
block_size
]
=
block
return
data
.
tostring
()
def
encrypt
(
self
,
data
):
"""Encrypt data in ECB mode"""
return
self
.
ecb
(
data
,
self
.
cipher
.
encrypt_block
)
def
decrypt
(
self
,
data
):
"""Decrypt data in ECB mode"""
return
self
.
ecb
(
data
,
self
.
cipher
.
decrypt_block
)
#### CBC mode
class
CBCMode
(
object
):
"""Cipher Block Chaining (CBC) mode encryption. This mode avoids content leaks.
In CBC encryption, each plaintext block is XORed with the ciphertext block
preceding it; decryption is simply the inverse.
"""
# A better explanation of CBC can be found here:
# http://en.wikipedia.org/wiki/Block_cipher_modes_of_operation#Cipher-block_chaining_.28CBC.29
def
__init__
(
self
,
cipher
,
IV
):
self
.
cipher
=
cipher
self
.
block_size
=
cipher
.
block_size
self
.
IV
=
array
(
'B'
,
IV
)
def
encrypt
(
self
,
data
):
"""Encrypt data in CBC mode"""
block_size
=
self
.
block_size
if
len
(
data
)
%
block_size
!=
0
:
raise
ValueError
,
"Plaintext length must be multiple of 16"
data
=
array
(
'B'
,
data
)
IV
=
self
.
IV
for
offset
in
xrange
(
0
,
len
(
data
),
block_size
):
block
=
data
[
offset
:
offset
+
block_size
]
# Perform CBC chaining
for
i
in
xrange
(
block_size
):
block
[
i
]
^=
IV
[
i
]
self
.
cipher
.
encrypt_block
(
block
)
data
[
offset
:
offset
+
block_size
]
=
block
IV
=
block
self
.
IV
=
IV
return
data
.
tostring
()
def
decrypt
(
self
,
data
):
"""Decrypt data in CBC mode"""
block_size
=
self
.
block_size
if
len
(
data
)
%
block_size
!=
0
:
raise
ValueError
,
"Ciphertext length must be multiple of 16"
data
=
array
(
'B'
,
data
)
IV
=
self
.
IV
for
offset
in
xrange
(
0
,
len
(
data
),
block_size
):
ctext
=
data
[
offset
:
offset
+
block_size
]
block
=
ctext
[:]
self
.
cipher
.
decrypt_block
(
block
)
# Perform CBC chaining
#for i in xrange(block_size):
# data[offset + i] ^= IV[i]
for
i
in
xrange
(
block_size
):
block
[
i
]
^=
IV
[
i
]
data
[
offset
:
offset
+
block_size
]
=
block
IV
=
ctext
#data[offset : offset+block_size] = block
self
.
IV
=
IV
return
data
.
tostring
()
####
def
galois_multiply
(
a
,
b
):
"""Galois Field multiplicaiton for AES"""
p
=
0
while
b
:
if
b
&
1
:
p
^=
a
a
<<=
1
if
a
&
0x100
:
a
^=
0x1b
b
>>=
1
return
p
&
0xff
# Precompute the multiplication tables for encryption
gf_mul_by_2
=
array
(
'B'
,
[
galois_multiply
(
x
,
2
)
for
x
in
range
(
256
)])
gf_mul_by_3
=
array
(
'B'
,
[
galois_multiply
(
x
,
3
)
for
x
in
range
(
256
)])
# ... for decryption
gf_mul_by_9
=
array
(
'B'
,
[
galois_multiply
(
x
,
9
)
for
x
in
range
(
256
)])
gf_mul_by_11
=
array
(
'B'
,
[
galois_multiply
(
x
,
11
)
for
x
in
range
(
256
)])
gf_mul_by_13
=
array
(
'B'
,
[
galois_multiply
(
x
,
13
)
for
x
in
range
(
256
)])
gf_mul_by_14
=
array
(
'B'
,
[
galois_multiply
(
x
,
14
)
for
x
in
range
(
256
)])
####
# The S-box is a 256-element array, that maps a single byte value to another
# byte value. Since it's designed to be reversible, each value occurs only once
# in the S-box
#
# More information: http://en.wikipedia.org/wiki/Rijndael_S-box
aes_sbox
=
array
(
'B'
,
'637c777bf26b6fc53001672bfed7ab76'
'ca82c97dfa5947f0add4a2af9ca472c0'
'b7fd9326363ff7cc34a5e5f171d83115'
'04c723c31896059a071280e2eb27b275'
'09832c1a1b6e5aa0523bd6b329e32f84'
'53d100ed20fcb15b6acbbe394a4c58cf'
'd0efaafb434d338545f9027f503c9fa8'
'51a3408f929d38f5bcb6da2110fff3d2'
'cd0c13ec5f974417c4a77e3d645d1973'
'60814fdc222a908846eeb814de5e0bdb'
'e0323a0a4906245cc2d3ac629195e479'
'e7c8376d8dd54ea96c56f4ea657aae08'
'ba78252e1ca6b4c6e8dd741f4bbd8b8a'
'703eb5664803f60e613557b986c11d9e'
'e1f8981169d98e949b1e87e9ce5528df'
'8ca1890dbfe6426841992d0fb054bb16'
.
decode
(
'hex'
)
)
# This is the inverse of the above. In other words:
# aes_inv_sbox[aes_sbox[val]] == val
aes_inv_sbox
=
array
(
'B'
,
'52096ad53036a538bf40a39e81f3d7fb'
'7ce339829b2fff87348e4344c4dee9cb'
'547b9432a6c2233dee4c950b42fac34e'
'082ea16628d924b2765ba2496d8bd125'
'72f8f66486689816d4a45ccc5d65b692'
'6c704850fdedb9da5e154657a78d9d84'
'90d8ab008cbcd30af7e45805b8b34506'
'd02c1e8fca3f0f02c1afbd0301138a6b'
'3a9111414f67dcea97f2cfcef0b4e673'
'96ac7422e7ad3585e2f937e81c75df6e'
'47f11a711d29c5896fb7620eaa18be1b'
'fc563e4bc6d279209adbc0fe78cd5af4'
'1fdda8338807c731b11210592780ec5f'
'60517fa919b54a0d2de57a9f93c99cef'
'a0e03b4dae2af5b0c8ebbb3c83539961'
'172b047eba77d626e169146355210c7d'
.
decode
(
'hex'
)
)
# The Rcon table is used in AES's key schedule (key expansion)
# It's a pre-computed table of exponentation of 2 in AES's finite field
#
# More information: http://en.wikipedia.org/wiki/Rijndael_key_schedule
aes_Rcon
=
array
(
'B'
,
'8d01020408102040801b366cd8ab4d9a'
'2f5ebc63c697356ad4b37dfaefc59139'
'72e4d3bd61c29f254a943366cc831d3a'
'74e8cb8d01020408102040801b366cd8'
'ab4d9a2f5ebc63c697356ad4b37dfaef'
'c5913972e4d3bd61c29f254a943366cc'
'831d3a74e8cb8d01020408102040801b'
'366cd8ab4d9a2f5ebc63c697356ad4b3'
'7dfaefc5913972e4d3bd61c29f254a94'
'3366cc831d3a74e8cb8d010204081020'
'40801b366cd8ab4d9a2f5ebc63c69735'
'6ad4b37dfaefc5913972e4d3bd61c29f'
'254a943366cc831d3a74e8cb8d010204'
'08102040801b366cd8ab4d9a2f5ebc63'
'c697356ad4b37dfaefc5913972e4d3bd'
'61c29f254a943366cc831d3a74e8cb'
.
decode
(
'hex'
)
)
# pyston change
key
=
'Very_secret'
.
ljust
(
16
)
iv
=
'very random'
.
ljust
(
16
)
ciphertext
=
new
(
key
,
MODE_CBC
,
IV
=
iv
).
encrypt
(
'pyston rocks!'
.
ljust
(
16
))
print
ciphertext
.
encode
(
"hex"
)
print
new
(
key
,
MODE_CBC
,
IV
=
iv
).
decrypt
(
ciphertext
)
src/capi/abstract.cpp
View file @
a7c10a6e
...
@@ -300,6 +300,10 @@ extern "C" PyObject* PyObject_CallObject(PyObject* obj, PyObject* args) noexcept
...
@@ -300,6 +300,10 @@ extern "C" PyObject* PyObject_CallObject(PyObject* obj, PyObject* args) noexcept
}
}
}
}
extern
"C"
int
PyObject_AsReadBuffer
(
PyObject
*
obj
,
const
void
**
buffer
,
Py_ssize_t
*
buffer_len
)
noexcept
{
Py_FatalError
(
"unimplemented"
);
}
static
PyObject
*
call_function_tail
(
PyObject
*
callable
,
PyObject
*
args
)
{
static
PyObject
*
call_function_tail
(
PyObject
*
callable
,
PyObject
*
args
)
{
PyObject
*
retval
;
PyObject
*
retval
;
...
@@ -324,11 +328,11 @@ static PyObject* call_function_tail(PyObject* callable, PyObject* args) {
...
@@ -324,11 +328,11 @@ static PyObject* call_function_tail(PyObject* callable, PyObject* args) {
return
retval
;
return
retval
;
}
}
extern
"C"
PyObject
*
PyObject_CallMethod
(
PyObject
*
o
,
c
har
*
name
,
char
*
format
,
...)
noexcept
{
extern
"C"
PyObject
*
PyObject_CallMethod
(
PyObject
*
o
,
c
onst
char
*
name
,
const
char
*
format
,
...)
noexcept
{
Py_FatalError
(
"unimplemented"
);
Py_FatalError
(
"unimplemented"
);
}
}
extern
"C"
PyObject
*
_PyObject_CallMethod_SizeT
(
PyObject
*
o
,
c
har
*
name
,
char
*
format
,
...)
noexcept
{
extern
"C"
PyObject
*
_PyObject_CallMethod_SizeT
(
PyObject
*
o
,
c
onst
char
*
name
,
const
char
*
format
,
...)
noexcept
{
// TODO it looks like this could be made much more efficient by calling our callattr(), but
// TODO it looks like this could be made much more efficient by calling our callattr(), but
// I haven't taken the time to verify that that has the same behavior
// I haven't taken the time to verify that that has the same behavior
...
@@ -464,7 +468,7 @@ extern "C" int PyObject_IsSubclass(PyObject* derived, PyObject* cls) noexcept {
...
@@ -464,7 +468,7 @@ extern "C" int PyObject_IsSubclass(PyObject* derived, PyObject* cls) noexcept {
return
recursive_issubclass
(
derived
,
cls
);
return
recursive_issubclass
(
derived
,
cls
);
}
}
extern
"C"
PyObject
*
_PyObject_CallFunction_SizeT
(
PyObject
*
callable
,
char
*
format
,
...)
noexcept
{
extern
"C"
PyObject
*
_PyObject_CallFunction_SizeT
(
PyObject
*
callable
,
c
onst
c
har
*
format
,
...)
noexcept
{
Py_FatalError
(
"unimplemented"
);
Py_FatalError
(
"unimplemented"
);
}
}
...
@@ -615,7 +619,7 @@ extern "C" PyObject* PySequence_List(PyObject* v) noexcept {
...
@@ -615,7 +619,7 @@ extern "C" PyObject* PySequence_List(PyObject* v) noexcept {
return
result
;
return
result
;
}
}
extern
"C"
PyObject
*
PyObject_CallFunction
(
PyObject
*
callable
,
char
*
format
,
...)
noexcept
{
extern
"C"
PyObject
*
PyObject_CallFunction
(
PyObject
*
callable
,
c
onst
c
har
*
format
,
...)
noexcept
{
Py_FatalError
(
"unimplemented"
);
Py_FatalError
(
"unimplemented"
);
}
}
...
...
src/capi/codecs.cpp
0 → 100644
View file @
a7c10a6e
// Copyright (c) 2014-2015 Dropbox, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This file is originally from CPython 2.7, with modifications for Pyston
#include "Python.h"
#include "core/types.h"
#include "runtime/types.h"
namespace
pyston
{
// Pyston change: we don't provide a PyInterpreterState yet instead create a custom PyInterpreterStateCodec
struct
PyInterpreterStateCodec
{
Box
*
codec_search_path
;
Box
*
codec_search_cache
;
Box
*
codec_error_registry
;
};
static
PyInterpreterStateCodec
_inter
;
static
PyInterpreterStateCodec
*
interp
=
&
_inter
;
extern
"C"
{
/* --- Codec Registry ----------------------------------------------------- */
/* Import the standard encodings package which will register the first
codec search function.
This is done in a lazy way so that the Unicode implementation does
not downgrade startup time of scripts not needing it.
ImportErrors are silently ignored by this function. Only one try is
made.
*/
static
int
_PyCodecRegistry_Init
(
void
);
/* Forward */
int
PyCodec_Register
(
PyObject
*
search_function
)
noexcept
{
// Pyston change
// PyInterpreterState *interp = PyThreadState_GET()->interp;
if
(
interp
->
codec_search_path
==
NULL
&&
_PyCodecRegistry_Init
())
goto
onError
;
if
(
search_function
==
NULL
)
{
PyErr_BadArgument
();
goto
onError
;
}
if
(
!
PyCallable_Check
(
search_function
))
{
PyErr_SetString
(
PyExc_TypeError
,
"argument must be callable"
);
goto
onError
;
}
return
PyList_Append
(
interp
->
codec_search_path
,
search_function
);
onError:
return
-
1
;
}
/* Convert a string to a normalized Python string: all characters are
converted to lower case, spaces are replaced with underscores. */
static
PyObject
*
normalizestring
(
const
char
*
string
)
{
/*register*/
size_t
i
;
size_t
len
=
strlen
(
string
);
char
*
p
;
PyObject
*
v
;
if
(
len
>
PY_SSIZE_T_MAX
)
{
PyErr_SetString
(
PyExc_OverflowError
,
"string is too large"
);
return
NULL
;
}
v
=
PyString_FromStringAndSize
(
NULL
,
len
);
if
(
v
==
NULL
)
return
NULL
;
p
=
PyString_AS_STRING
(
v
);
for
(
i
=
0
;
i
<
len
;
i
++
)
{
/*register*/
char
ch
=
string
[
i
];
if
(
ch
==
' '
)
ch
=
'-'
;
else
ch
=
Py_TOLOWER
(
Py_CHARMASK
(
ch
));
p
[
i
]
=
ch
;
}
return
v
;
}
/* Lookup the given encoding and return a tuple providing the codec
facilities.
The encoding string is looked up converted to all lower-case
characters. This makes encodings looked up through this mechanism
effectively case-insensitive.
If no codec is found, a LookupError is set and NULL returned.
As side effect, this tries to load the encodings package, if not
yet done. This is part of the lazy load strategy for the encodings
package.
*/
PyObject
*
_PyCodec_Lookup
(
const
char
*
encoding
)
noexcept
{
// PyInterpreterState *interp; Pyston change
PyObject
*
result
,
*
args
=
NULL
,
*
v
;
Py_ssize_t
i
,
len
;
if
(
encoding
==
NULL
)
{
PyErr_BadArgument
();
goto
onError
;
}
// Pyston change
// interp = PyThreadState_GET()->interp;
if
(
interp
->
codec_search_path
==
NULL
&&
_PyCodecRegistry_Init
())
goto
onError
;
/* Convert the encoding to a normalized Python string: all
characters are converted to lower case, spaces and hyphens are
replaced with underscores. */
v
=
normalizestring
(
encoding
);
if
(
v
==
NULL
)
goto
onError
;
// Pyston change
// PyString_InternInPlace(&v);
/* First, try to lookup the name in the registry dictionary */
result
=
PyDict_GetItem
(
interp
->
codec_search_cache
,
v
);
if
(
result
!=
NULL
)
{
Py_INCREF
(
result
);
Py_DECREF
(
v
);
return
result
;
}
/* Next, scan the search functions in order of registration */
args
=
PyTuple_New
(
1
);
if
(
args
==
NULL
)
goto
onError
;
PyTuple_SET_ITEM
(
args
,
0
,
v
);
len
=
PyList_Size
(
interp
->
codec_search_path
);
if
(
len
<
0
)
goto
onError
;
if
(
len
==
0
)
{
PyErr_SetString
(
PyExc_LookupError
,
"no codec search functions registered: "
"can't find encoding"
);
goto
onError
;
}
for
(
i
=
0
;
i
<
len
;
i
++
)
{
PyObject
*
func
;
func
=
PyList_GetItem
(
interp
->
codec_search_path
,
i
);
if
(
func
==
NULL
)
goto
onError
;
result
=
PyEval_CallObject
(
func
,
args
);
if
(
result
==
NULL
)
goto
onError
;
if
(
result
==
Py_None
)
{
Py_DECREF
(
result
);
continue
;
}
if
(
!
PyTuple_Check
(
result
)
||
PyTuple_GET_SIZE
(
result
)
!=
4
)
{
PyErr_SetString
(
PyExc_TypeError
,
"codec search functions must return 4-tuples"
);
Py_DECREF
(
result
);
goto
onError
;
}
break
;
}
if
(
i
==
len
)
{
/* XXX Perhaps we should cache misses too ? */
PyErr_Format
(
PyExc_LookupError
,
"unknown encoding: %s"
,
encoding
);
goto
onError
;
}
/* Cache and return the result */
PyDict_SetItem
(
interp
->
codec_search_cache
,
v
,
result
);
Py_DECREF
(
args
);
return
result
;
onError:
Py_XDECREF
(
args
);
return
NULL
;
}
static
PyObject
*
args_tuple
(
PyObject
*
object
,
const
char
*
errors
)
{
PyObject
*
args
;
args
=
PyTuple_New
(
1
+
(
errors
!=
NULL
));
if
(
args
==
NULL
)
return
NULL
;
Py_INCREF
(
object
);
PyTuple_SET_ITEM
(
args
,
0
,
object
);
if
(
errors
)
{
PyObject
*
v
;
v
=
PyString_FromString
(
errors
);
if
(
v
==
NULL
)
{
Py_DECREF
(
args
);
return
NULL
;
}
PyTuple_SET_ITEM
(
args
,
1
,
v
);
}
return
args
;
}
/* Helper function to get a codec item */
static
PyObject
*
codec_getitem
(
const
char
*
encoding
,
int
index
)
{
PyObject
*
codecs
;
PyObject
*
v
;
codecs
=
_PyCodec_Lookup
(
encoding
);
if
(
codecs
==
NULL
)
return
NULL
;
v
=
PyTuple_GET_ITEM
(
codecs
,
index
);
Py_DECREF
(
codecs
);
Py_INCREF
(
v
);
return
v
;
}
/* Helper function to create an incremental codec. */
static
PyObject
*
codec_getincrementalcodec
(
const
char
*
encoding
,
const
char
*
errors
,
const
char
*
attrname
)
{
PyObject
*
codecs
,
*
ret
,
*
inccodec
;
codecs
=
_PyCodec_Lookup
(
encoding
);
if
(
codecs
==
NULL
)
return
NULL
;
inccodec
=
PyObject_GetAttrString
(
codecs
,
attrname
);
Py_DECREF
(
codecs
);
if
(
inccodec
==
NULL
)
return
NULL
;
if
(
errors
)
ret
=
PyObject_CallFunction
(
inccodec
,
"s"
,
errors
);
else
ret
=
PyObject_CallFunction
(
inccodec
,
NULL
);
Py_DECREF
(
inccodec
);
return
ret
;
}
/* Helper function to create a stream codec. */
static
PyObject
*
codec_getstreamcodec
(
const
char
*
encoding
,
PyObject
*
stream
,
const
char
*
errors
,
const
int
index
)
{
PyObject
*
codecs
,
*
streamcodec
,
*
codeccls
;
codecs
=
_PyCodec_Lookup
(
encoding
);
if
(
codecs
==
NULL
)
return
NULL
;
codeccls
=
PyTuple_GET_ITEM
(
codecs
,
index
);
if
(
errors
!=
NULL
)
streamcodec
=
PyObject_CallFunction
(
codeccls
,
"Os"
,
stream
,
errors
);
else
streamcodec
=
PyObject_CallFunction
(
codeccls
,
"O"
,
stream
);
Py_DECREF
(
codecs
);
return
streamcodec
;
}
/* Convenience APIs to query the Codec registry.
All APIs return a codec object with incremented refcount.
*/
PyObject
*
PyCodec_Encoder
(
const
char
*
encoding
)
noexcept
{
return
codec_getitem
(
encoding
,
0
);
}
PyObject
*
PyCodec_Decoder
(
const
char
*
encoding
)
noexcept
{
return
codec_getitem
(
encoding
,
1
);
}
PyObject
*
PyCodec_IncrementalEncoder
(
const
char
*
encoding
,
const
char
*
errors
)
noexcept
{
return
codec_getincrementalcodec
(
encoding
,
errors
,
"incrementalencoder"
);
}
PyObject
*
PyCodec_IncrementalDecoder
(
const
char
*
encoding
,
const
char
*
errors
)
noexcept
{
return
codec_getincrementalcodec
(
encoding
,
errors
,
"incrementaldecoder"
);
}
PyObject
*
PyCodec_StreamReader
(
const
char
*
encoding
,
PyObject
*
stream
,
const
char
*
errors
)
noexcept
{
return
codec_getstreamcodec
(
encoding
,
stream
,
errors
,
2
);
}
PyObject
*
PyCodec_StreamWriter
(
const
char
*
encoding
,
PyObject
*
stream
,
const
char
*
errors
)
noexcept
{
return
codec_getstreamcodec
(
encoding
,
stream
,
errors
,
3
);
}
/* Encode an object (e.g. an Unicode object) using the given encoding
and return the resulting encoded object (usually a Python string).
errors is passed to the encoder factory as argument if non-NULL. */
PyObject
*
PyCodec_Encode
(
PyObject
*
object
,
const
char
*
encoding
,
const
char
*
errors
)
noexcept
{
PyObject
*
encoder
=
NULL
;
PyObject
*
args
=
NULL
,
*
result
=
NULL
;
PyObject
*
v
;
encoder
=
PyCodec_Encoder
(
encoding
);
if
(
encoder
==
NULL
)
goto
onError
;
args
=
args_tuple
(
object
,
errors
);
if
(
args
==
NULL
)
goto
onError
;
result
=
PyEval_CallObject
(
encoder
,
args
);
if
(
result
==
NULL
)
goto
onError
;
if
(
!
PyTuple_Check
(
result
)
||
PyTuple_GET_SIZE
(
result
)
!=
2
)
{
PyErr_SetString
(
PyExc_TypeError
,
"encoder must return a tuple (object,integer)"
);
goto
onError
;
}
v
=
PyTuple_GET_ITEM
(
result
,
0
);
Py_INCREF
(
v
);
/* We don't check or use the second (integer) entry. */
Py_DECREF
(
args
);
Py_DECREF
(
encoder
);
Py_DECREF
(
result
);
return
v
;
onError:
Py_XDECREF
(
result
);
Py_XDECREF
(
args
);
Py_XDECREF
(
encoder
);
return
NULL
;
}
/* Decode an object (usually a Python string) using the given encoding
and return an equivalent object (e.g. an Unicode object).
errors is passed to the decoder factory as argument if non-NULL. */
PyObject
*
PyCodec_Decode
(
PyObject
*
object
,
const
char
*
encoding
,
const
char
*
errors
)
noexcept
{
PyObject
*
decoder
=
NULL
;
PyObject
*
args
=
NULL
,
*
result
=
NULL
;
PyObject
*
v
;
decoder
=
PyCodec_Decoder
(
encoding
);
if
(
decoder
==
NULL
)
goto
onError
;
args
=
args_tuple
(
object
,
errors
);
if
(
args
==
NULL
)
goto
onError
;
result
=
PyEval_CallObject
(
decoder
,
args
);
if
(
result
==
NULL
)
goto
onError
;
if
(
!
PyTuple_Check
(
result
)
||
PyTuple_GET_SIZE
(
result
)
!=
2
)
{
PyErr_SetString
(
PyExc_TypeError
,
"decoder must return a tuple (object,integer)"
);
goto
onError
;
}
v
=
PyTuple_GET_ITEM
(
result
,
0
);
Py_INCREF
(
v
);
/* We don't check or use the second (integer) entry. */
Py_DECREF
(
args
);
Py_DECREF
(
decoder
);
Py_DECREF
(
result
);
return
v
;
onError:
Py_XDECREF
(
args
);
Py_XDECREF
(
decoder
);
Py_XDECREF
(
result
);
return
NULL
;
}
/* Register the error handling callback function error under the name
name. This function will be called by the codec when it encounters
an unencodable characters/undecodable bytes and doesn't know the
callback name, when name is specified as the error parameter
in the call to the encode/decode function.
Return 0 on success, -1 on error */
int
PyCodec_RegisterError
(
const
char
*
name
,
PyObject
*
error
)
noexcept
{
// PyInterpreterState *interp = PyThreadState_GET()->interp; pyston change
if
(
interp
->
codec_search_path
==
NULL
&&
_PyCodecRegistry_Init
())
return
-
1
;
if
(
!
PyCallable_Check
(
error
))
{
PyErr_SetString
(
PyExc_TypeError
,
"handler must be callable"
);
return
-
1
;
}
return
PyDict_SetItemString
(
interp
->
codec_error_registry
,
name
,
error
);
}
/* Lookup the error handling callback function registered under the
name error. As a special case NULL can be passed, in which case
the error handling callback for strict encoding will be returned. */
PyObject
*
PyCodec_LookupError
(
const
char
*
name
)
noexcept
{
PyObject
*
handler
=
NULL
;
// Pyston change
// PyInterpreterState *interp = PyThreadState_GET()->interp;
if
(
interp
->
codec_search_path
==
NULL
&&
_PyCodecRegistry_Init
())
return
NULL
;
if
(
name
==
NULL
)
name
=
"strict"
;
handler
=
PyDict_GetItemString
(
interp
->
codec_error_registry
,
name
);
if
(
!
handler
)
PyErr_Format
(
PyExc_LookupError
,
"unknown error handler name '%.400s'"
,
name
);
else
Py_INCREF
(
handler
);
return
handler
;
}
static
void
wrong_exception_type
(
PyObject
*
exc
)
{
PyObject
*
type
=
PyObject_GetAttrString
(
exc
,
"__class__"
);
if
(
type
!=
NULL
)
{
PyObject
*
name
=
PyObject_GetAttrString
(
type
,
"__name__"
);
Py_DECREF
(
type
);
if
(
name
!=
NULL
)
{
PyObject
*
string
=
PyObject_Str
(
name
);
Py_DECREF
(
name
);
if
(
string
!=
NULL
)
{
PyErr_Format
(
PyExc_TypeError
,
"don't know how to handle %.400s in error callback"
,
PyString_AS_STRING
(
string
));
Py_DECREF
(
string
);
}
}
}
}
PyObject
*
PyCodec_StrictErrors
(
PyObject
*
exc
)
noexcept
{
if
(
PyExceptionInstance_Check
(
exc
))
PyErr_SetObject
(
PyExceptionInstance_Class
(
exc
),
exc
);
else
PyErr_SetString
(
PyExc_TypeError
,
"codec must pass exception instance"
);
return
NULL
;
}
#ifdef Py_USING_UNICODE
PyObject
*
PyCodec_IgnoreErrors
(
PyObject
*
exc
)
noexcept
{
Py_ssize_t
end
;
if
(
PyObject_IsInstance
(
exc
,
PyExc_UnicodeEncodeError
))
{
if
(
PyUnicodeEncodeError_GetEnd
(
exc
,
&
end
))
return
NULL
;
}
else
if
(
PyObject_IsInstance
(
exc
,
PyExc_UnicodeDecodeError
))
{
if
(
PyUnicodeDecodeError_GetEnd
(
exc
,
&
end
))
return
NULL
;
}
else
if
(
PyObject_IsInstance
(
exc
,
PyExc_UnicodeTranslateError
))
{
if
(
PyUnicodeTranslateError_GetEnd
(
exc
,
&
end
))
return
NULL
;
}
else
{
wrong_exception_type
(
exc
);
return
NULL
;
}
/* ouch: passing NULL, 0, pos gives None instead of u'' */
return
Py_BuildValue
(
"(u#n)"
,
&
end
,
0
,
end
);
}
PyObject
*
PyCodec_ReplaceErrors
(
PyObject
*
exc
)
noexcept
{
PyObject
*
restuple
;
Py_ssize_t
start
;
Py_ssize_t
end
;
Py_ssize_t
i
;
if
(
PyObject_IsInstance
(
exc
,
PyExc_UnicodeEncodeError
))
{
PyObject
*
res
;
Py_UNICODE
*
p
;
if
(
PyUnicodeEncodeError_GetStart
(
exc
,
&
start
))
return
NULL
;
if
(
PyUnicodeEncodeError_GetEnd
(
exc
,
&
end
))
return
NULL
;
res
=
PyUnicode_FromUnicode
(
NULL
,
end
-
start
);
if
(
res
==
NULL
)
return
NULL
;
for
(
p
=
PyUnicode_AS_UNICODE
(
res
),
i
=
start
;
i
<
end
;
++
p
,
++
i
)
*
p
=
'?'
;
restuple
=
Py_BuildValue
(
"(On)"
,
res
,
end
);
Py_DECREF
(
res
);
return
restuple
;
}
else
if
(
PyObject_IsInstance
(
exc
,
PyExc_UnicodeDecodeError
))
{
Py_UNICODE
res
=
Py_UNICODE_REPLACEMENT_CHARACTER
;
if
(
PyUnicodeDecodeError_GetEnd
(
exc
,
&
end
))
return
NULL
;
return
Py_BuildValue
(
"(u#n)"
,
&
res
,
(
Py_ssize_t
)
1
,
end
);
}
else
if
(
PyObject_IsInstance
(
exc
,
PyExc_UnicodeTranslateError
))
{
PyObject
*
res
;
Py_UNICODE
*
p
;
if
(
PyUnicodeTranslateError_GetStart
(
exc
,
&
start
))
return
NULL
;
if
(
PyUnicodeTranslateError_GetEnd
(
exc
,
&
end
))
return
NULL
;
res
=
PyUnicode_FromUnicode
(
NULL
,
end
-
start
);
if
(
res
==
NULL
)
return
NULL
;
for
(
p
=
PyUnicode_AS_UNICODE
(
res
),
i
=
start
;
i
<
end
;
++
p
,
++
i
)
*
p
=
Py_UNICODE_REPLACEMENT_CHARACTER
;
restuple
=
Py_BuildValue
(
"(On)"
,
res
,
end
);
Py_DECREF
(
res
);
return
restuple
;
}
else
{
wrong_exception_type
(
exc
);
return
NULL
;
}
}
PyObject
*
PyCodec_XMLCharRefReplaceErrors
(
PyObject
*
exc
)
noexcept
{
if
(
PyObject_IsInstance
(
exc
,
PyExc_UnicodeEncodeError
))
{
PyObject
*
restuple
;
PyObject
*
object
;
Py_ssize_t
start
;
Py_ssize_t
end
;
PyObject
*
res
;
Py_UNICODE
*
p
;
Py_UNICODE
*
startp
;
Py_UNICODE
*
e
;
Py_UNICODE
*
outp
;
int
ressize
;
if
(
PyUnicodeEncodeError_GetStart
(
exc
,
&
start
))
return
NULL
;
if
(
PyUnicodeEncodeError_GetEnd
(
exc
,
&
end
))
return
NULL
;
if
(
!
(
object
=
PyUnicodeEncodeError_GetObject
(
exc
)))
return
NULL
;
startp
=
PyUnicode_AS_UNICODE
(
object
);
e
=
startp
+
end
;
for
(
p
=
startp
+
start
,
ressize
=
0
;
p
<
e
;)
{
Py_UCS4
ch
=
*
p
++
;
#ifndef Py_UNICODE_WIDE
if
((
0xD800
<=
ch
&&
ch
<=
0xDBFF
)
&&
(
p
<
e
)
&&
(
0xDC00
<=
*
p
&&
*
p
<=
0xDFFF
))
{
ch
=
((((
ch
&
0x03FF
)
<<
10
)
|
((
Py_UCS4
)
*
p
++
&
0x03FF
))
+
0x10000
);
}
#endif
if
(
ch
<
10
)
ressize
+=
2
+
1
+
1
;
else
if
(
ch
<
100
)
ressize
+=
2
+
2
+
1
;
else
if
(
ch
<
1000
)
ressize
+=
2
+
3
+
1
;
else
if
(
ch
<
10000
)
ressize
+=
2
+
4
+
1
;
else
if
(
ch
<
100000
)
ressize
+=
2
+
5
+
1
;
else
if
(
ch
<
1000000
)
ressize
+=
2
+
6
+
1
;
else
ressize
+=
2
+
7
+
1
;
}
/* allocate replacement */
res
=
PyUnicode_FromUnicode
(
NULL
,
ressize
);
if
(
res
==
NULL
)
{
Py_DECREF
(
object
);
return
NULL
;
}
/* generate replacement */
for
(
p
=
startp
+
start
,
outp
=
PyUnicode_AS_UNICODE
(
res
);
p
<
e
;)
{
int
digits
;
int
base
;
Py_UCS4
ch
=
*
p
++
;
#ifndef Py_UNICODE_WIDE
if
((
0xD800
<=
ch
&&
ch
<=
0xDBFF
)
&&
(
p
<
startp
+
end
)
&&
(
0xDC00
<=
*
p
&&
*
p
<=
0xDFFF
))
{
ch
=
((((
ch
&
0x03FF
)
<<
10
)
|
((
Py_UCS4
)
*
p
++
&
0x03FF
))
+
0x10000
);
}
#endif
*
outp
++
=
'&'
;
*
outp
++
=
'#'
;
if
(
ch
<
10
)
{
digits
=
1
;
base
=
1
;
}
else
if
(
ch
<
100
)
{
digits
=
2
;
base
=
10
;
}
else
if
(
ch
<
1000
)
{
digits
=
3
;
base
=
100
;
}
else
if
(
ch
<
10000
)
{
digits
=
4
;
base
=
1000
;
}
else
if
(
ch
<
100000
)
{
digits
=
5
;
base
=
10000
;
}
else
if
(
ch
<
1000000
)
{
digits
=
6
;
base
=
100000
;
}
else
{
digits
=
7
;
base
=
1000000
;
}
while
(
digits
--
>
0
)
{
*
outp
++
=
'0'
+
ch
/
base
;
ch
%=
base
;
base
/=
10
;
}
*
outp
++
=
';'
;
}
restuple
=
Py_BuildValue
(
"(On)"
,
res
,
end
);
Py_DECREF
(
res
);
Py_DECREF
(
object
);
return
restuple
;
}
else
{
wrong_exception_type
(
exc
);
return
NULL
;
}
}
static
Py_UNICODE
hexdigits
[]
=
{
'0'
,
'1'
,
'2'
,
'3'
,
'4'
,
'5'
,
'6'
,
'7'
,
'8'
,
'9'
,
'a'
,
'b'
,
'c'
,
'd'
,
'e'
,
'f'
};
PyObject
*
PyCodec_BackslashReplaceErrors
(
PyObject
*
exc
)
noexcept
{
if
(
PyObject_IsInstance
(
exc
,
PyExc_UnicodeEncodeError
))
{
PyObject
*
restuple
;
PyObject
*
object
;
Py_ssize_t
start
;
Py_ssize_t
end
;
PyObject
*
res
;
Py_UNICODE
*
p
;
Py_UNICODE
*
startp
;
Py_UNICODE
*
outp
;
int
ressize
;
if
(
PyUnicodeEncodeError_GetStart
(
exc
,
&
start
))
return
NULL
;
if
(
PyUnicodeEncodeError_GetEnd
(
exc
,
&
end
))
return
NULL
;
if
(
!
(
object
=
PyUnicodeEncodeError_GetObject
(
exc
)))
return
NULL
;
startp
=
PyUnicode_AS_UNICODE
(
object
);
for
(
p
=
startp
+
start
,
ressize
=
0
;
p
<
startp
+
end
;
++
p
)
{
#ifdef Py_UNICODE_WIDE
if
(
*
p
>=
0x00010000
)
ressize
+=
1
+
1
+
8
;
else
#endif
if
(
*
p
>=
0x100
)
{
ressize
+=
1
+
1
+
4
;
}
else
ressize
+=
1
+
1
+
2
;
}
res
=
PyUnicode_FromUnicode
(
NULL
,
ressize
);
if
(
res
==
NULL
)
return
NULL
;
for
(
p
=
startp
+
start
,
outp
=
PyUnicode_AS_UNICODE
(
res
);
p
<
startp
+
end
;
++
p
)
{
Py_UNICODE
c
=
*
p
;
*
outp
++
=
'\\'
;
#ifdef Py_UNICODE_WIDE
if
(
c
>=
0x00010000
)
{
*
outp
++
=
'U'
;
*
outp
++
=
hexdigits
[(
c
>>
28
)
&
0xf
];
*
outp
++
=
hexdigits
[(
c
>>
24
)
&
0xf
];
*
outp
++
=
hexdigits
[(
c
>>
20
)
&
0xf
];
*
outp
++
=
hexdigits
[(
c
>>
16
)
&
0xf
];
*
outp
++
=
hexdigits
[(
c
>>
12
)
&
0xf
];
*
outp
++
=
hexdigits
[(
c
>>
8
)
&
0xf
];
}
else
#endif
if
(
c
>=
0x100
)
{
*
outp
++
=
'u'
;
*
outp
++
=
hexdigits
[(
c
>>
12
)
&
0xf
];
*
outp
++
=
hexdigits
[(
c
>>
8
)
&
0xf
];
}
else
*
outp
++
=
'x'
;
*
outp
++
=
hexdigits
[(
c
>>
4
)
&
0xf
];
*
outp
++
=
hexdigits
[
c
&
0xf
];
}
restuple
=
Py_BuildValue
(
"(On)"
,
res
,
end
);
Py_DECREF
(
res
);
Py_DECREF
(
object
);
return
restuple
;
}
else
{
wrong_exception_type
(
exc
);
return
NULL
;
}
}
#endif
static
PyObject
*
strict_errors
(
PyObject
*
self
,
PyObject
*
exc
)
{
return
PyCodec_StrictErrors
(
exc
);
}
#ifdef Py_USING_UNICODE
static
PyObject
*
ignore_errors
(
PyObject
*
self
,
PyObject
*
exc
)
{
return
PyCodec_IgnoreErrors
(
exc
);
}
static
PyObject
*
replace_errors
(
PyObject
*
self
,
PyObject
*
exc
)
{
return
PyCodec_ReplaceErrors
(
exc
);
}
static
PyObject
*
xmlcharrefreplace_errors
(
PyObject
*
self
,
PyObject
*
exc
)
{
return
PyCodec_XMLCharRefReplaceErrors
(
exc
);
}
static
PyObject
*
backslashreplace_errors
(
PyObject
*
self
,
PyObject
*
exc
)
{
return
PyCodec_BackslashReplaceErrors
(
exc
);
}
#endif
static
int
_PyCodecRegistry_Init
(
void
)
{
static
struct
{
const
char
*
name
;
PyMethodDef
def
;
}
methods
[]
=
{
{
"strict"
,
{
"strict_errors"
,
strict_errors
,
METH_O
,
PyDoc_STR
(
"Implements the 'strict' error handling, which "
"raises a UnicodeError on coding errors."
)
}
},
#ifdef Py_USING_UNICODE
{
"ignore"
,
{
"ignore_errors"
,
ignore_errors
,
METH_O
,
PyDoc_STR
(
"Implements the 'ignore' error handling, which "
"ignores malformed data and continues."
)
}
},
{
"replace"
,
{
"replace_errors"
,
replace_errors
,
METH_O
,
PyDoc_STR
(
"Implements the 'replace' error handling, which "
"replaces malformed data with a replacement marker."
)
}
},
{
"xmlcharrefreplace"
,
{
"xmlcharrefreplace_errors"
,
xmlcharrefreplace_errors
,
METH_O
,
PyDoc_STR
(
"Implements the 'xmlcharrefreplace' error handling, "
"which replaces an unencodable character with the "
"appropriate XML character reference."
)
}
},
{
"backslashreplace"
,
{
"backslashreplace_errors"
,
backslashreplace_errors
,
METH_O
,
PyDoc_STR
(
"Implements the 'backslashreplace' error handling, "
"which replaces an unencodable character with a "
"backslashed escape sequence."
)
}
}
#endif
};
// Pyston change
// PyInterpreterState *interp = PyThreadState_GET()->interp;
PyObject
*
mod
;
unsigned
i
;
if
(
interp
->
codec_search_path
!=
NULL
)
return
0
;
interp
->
codec_search_path
=
PyList_New
(
0
);
interp
->
codec_search_cache
=
PyDict_New
();
interp
->
codec_error_registry
=
PyDict_New
();
// Pyston change: register roots
gc
::
registerPermanentRoot
(
interp
->
codec_search_path
);
gc
::
registerPermanentRoot
(
interp
->
codec_search_cache
);
gc
::
registerPermanentRoot
(
interp
->
codec_error_registry
);
if
(
interp
->
codec_error_registry
)
{
for
(
i
=
0
;
i
<
sizeof
(
methods
)
/
sizeof
(
methods
[
0
]);
++
i
)
{
PyObject
*
func
=
PyCFunction_New
(
&
methods
[
i
].
def
,
NULL
);
int
res
;
if
(
!
func
)
Py_FatalError
(
"can't initialize codec error registry"
);
res
=
PyCodec_RegisterError
(
methods
[
i
].
name
,
func
);
Py_DECREF
(
func
);
if
(
res
)
Py_FatalError
(
"can't initialize codec error registry"
);
}
}
if
(
interp
->
codec_search_path
==
NULL
||
interp
->
codec_search_cache
==
NULL
||
interp
->
codec_error_registry
==
NULL
)
Py_FatalError
(
"can't initialize codec registry"
);
mod
=
PyImport_ImportModuleLevel
(
"encodings"
,
NULL
,
NULL
,
NULL
,
0
);
if
(
mod
==
NULL
)
{
if
(
PyErr_ExceptionMatches
(
PyExc_ImportError
))
{
/* Ignore ImportErrors... this is done so that
distributions can disable the encodings package. Note
that other errors are not masked, e.g. SystemErrors
raised to inform the user of an error in the Python
configuration are still reported back to the user. */
PyErr_Clear
();
return
0
;
}
return
-
1
;
}
Py_DECREF
(
mod
);
return
0
;
}
}
}
// namespace pyston
src/capi/modsupport.cpp
View file @
a7c10a6e
...
@@ -97,6 +97,14 @@ static PyObject* do_mkvalue(const char** p_format, va_list* p_va, int flags) noe
...
@@ -97,6 +97,14 @@ static PyObject* do_mkvalue(const char** p_format, va_list* p_va, int flags) noe
case
'H'
:
case
'H'
:
return
PyInt_FromLong
((
long
)
va_arg
(
*
p_va
,
unsigned
int
));
return
PyInt_FromLong
((
long
)
va_arg
(
*
p_va
,
unsigned
int
));
case
'n'
:
#if SIZEOF_SIZE_T != SIZEOF_LONG
return
PyInt_FromSsize_t
(
va_arg
(
*
p_va
,
Py_ssize_t
));
#endif
/* Fall through from 'n' to 'l' if Py_ssize_t is long */
case
'l'
:
return
PyInt_FromLong
(
va_arg
(
*
p_va
,
long
));
case
'N'
:
case
'N'
:
case
'S'
:
case
'S'
:
case
'O'
:
case
'O'
:
...
...
src/runtime/builtin_modules/builtins.cpp
View file @
a7c10a6e
...
@@ -442,12 +442,22 @@ Box* issubclass_func(Box* child, Box* parent) {
...
@@ -442,12 +442,22 @@ Box* issubclass_func(Box* child, Box* parent) {
return
boxBool
(
isSubclass
(
static_cast
<
BoxedClass
*>
(
child
),
static_cast
<
BoxedClass
*>
(
parent
)));
return
boxBool
(
isSubclass
(
static_cast
<
BoxedClass
*>
(
child
),
static_cast
<
BoxedClass
*>
(
parent
)));
}
}
Box
*
bltinImport
(
Box
*
arg
)
{
Box
*
bltinImport
(
Box
*
name
,
Box
*
globals
,
Box
*
locals
,
Box
**
args
)
{
if
(
arg
->
cls
!=
str_cls
)
{
Box
*
fromlist
=
args
[
0
];
raiseExcHelper
(
TypeError
,
"__import__() argument 1 must be string, not %s"
,
getTypeName
(
arg
));
Box
*
level
=
args
[
1
];
RELEASE_ASSERT
(
globals
==
None
,
"not implemented"
);
RELEASE_ASSERT
(
locals
==
None
,
"not implemented"
);
if
(
name
->
cls
!=
str_cls
)
{
raiseExcHelper
(
TypeError
,
"__import__() argument 1 must be string, not %s"
,
getTypeName
(
name
));
}
if
(
level
->
cls
!=
int_cls
)
{
raiseExcHelper
(
TypeError
,
"an integer is required"
);
}
}
return
import
(
-
1
,
new
BoxedTuple
({}),
&
static_cast
<
BoxedString
*>
(
arg
)
->
s
);
return
import
(
((
BoxedInt
*
)
level
)
->
n
,
fromlist
,
&
static_cast
<
BoxedString
*>
(
name
)
->
s
);
}
}
Box
*
getattrFunc
(
Box
*
obj
,
Box
*
_str
,
Box
*
default_value
)
{
Box
*
getattrFunc
(
Box
*
obj
,
Box
*
_str
,
Box
*
default_value
)
{
...
@@ -575,7 +585,8 @@ BoxedClass* BaseException, *Exception, *StandardError, *AssertionError, *Attribu
...
@@ -575,7 +585,8 @@ BoxedClass* BaseException, *Exception, *StandardError, *AssertionError, *Attribu
*
NameError
,
*
KeyError
,
*
IndexError
,
*
IOError
,
*
OSError
,
*
ZeroDivisionError
,
*
ValueError
,
*
UnboundLocalError
,
*
NameError
,
*
KeyError
,
*
IndexError
,
*
IOError
,
*
OSError
,
*
ZeroDivisionError
,
*
ValueError
,
*
UnboundLocalError
,
*
RuntimeError
,
*
ImportError
,
*
StopIteration
,
*
Warning
,
*
SyntaxError
,
*
OverflowError
,
*
DeprecationWarning
,
*
RuntimeError
,
*
ImportError
,
*
StopIteration
,
*
Warning
,
*
SyntaxError
,
*
OverflowError
,
*
DeprecationWarning
,
*
MemoryError
,
*
LookupError
,
*
EnvironmentError
,
*
ArithmeticError
,
*
BufferError
,
*
KeyboardInterrupt
,
*
SystemExit
,
*
MemoryError
,
*
LookupError
,
*
EnvironmentError
,
*
ArithmeticError
,
*
BufferError
,
*
KeyboardInterrupt
,
*
SystemExit
,
*
SystemError
,
*
NotImplementedError
,
*
PendingDeprecationWarning
,
*
EOFError
;
*
SystemError
,
*
NotImplementedError
,
*
PendingDeprecationWarning
,
*
EOFError
,
*
UnicodeError
,
*
UnicodeEncodeError
,
*
UnicodeDecodeError
,
*
UnicodeTranslateError
;
Box
*
PyExc_RecursionErrorInst
;
Box
*
PyExc_RecursionErrorInst
;
Box
*
PyExc_MemoryErrorInst
;
Box
*
PyExc_MemoryErrorInst
;
...
@@ -1028,6 +1039,13 @@ void setupBuiltins() {
...
@@ -1028,6 +1039,13 @@ void setupBuiltins() {
PendingDeprecationWarning
=
makeBuiltinException
(
Warning
,
"PendingDeprecationWarning"
);
PendingDeprecationWarning
=
makeBuiltinException
(
Warning
,
"PendingDeprecationWarning"
);
EOFError
=
makeBuiltinException
(
StandardError
,
"EOFError"
);
EOFError
=
makeBuiltinException
(
StandardError
,
"EOFError"
);
// Unicode errors
UnicodeError
=
makeBuiltinException
(
ValueError
,
"UnicodeError"
);
UnicodeEncodeError
=
makeBuiltinException
(
UnicodeError
,
"UnicodeEncodeError"
);
UnicodeDecodeError
=
makeBuiltinException
(
UnicodeError
,
"UnicodeDecodeError"
);
UnicodeTranslateError
=
makeBuiltinException
(
UnicodeError
,
"UnicodeTranslateError"
);
BaseException
->
giveAttr
(
"__reduce__"
,
BaseException
->
giveAttr
(
"__reduce__"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
BoxedException
::
__reduce__
,
UNKNOWN
,
1
)));
new
BoxedFunction
(
boxRTFunction
((
void
*
)
BoxedException
::
__reduce__
,
UNKNOWN
,
1
)));
EnvironmentError
->
giveAttr
(
"__reduce__"
,
EnvironmentError
->
giveAttr
(
"__reduce__"
,
...
@@ -1096,8 +1114,10 @@ void setupBuiltins() {
...
@@ -1096,8 +1114,10 @@ void setupBuiltins() {
Box
*
issubclass_obj
=
new
BoxedBuiltinFunctionOrMethod
(
boxRTFunction
((
void
*
)
issubclass_func
,
BOXED_BOOL
,
2
));
Box
*
issubclass_obj
=
new
BoxedBuiltinFunctionOrMethod
(
boxRTFunction
((
void
*
)
issubclass_func
,
BOXED_BOOL
,
2
));
builtins_module
->
giveAttr
(
"issubclass"
,
issubclass_obj
);
builtins_module
->
giveAttr
(
"issubclass"
,
issubclass_obj
);
CLFunction
*
import_func
=
boxRTFunction
((
void
*
)
bltinImport
,
UNKNOWN
,
5
,
4
,
false
,
false
,
ParamNames
({
"name"
,
"globals"
,
"locals"
,
"fromlist"
,
"level"
},
""
,
""
));
builtins_module
->
giveAttr
(
"__import__"
,
builtins_module
->
giveAttr
(
"__import__"
,
new
BoxedBuiltinFunctionOrMethod
(
boxRTFunction
((
void
*
)
bltinImport
,
UNKNOWN
,
1
)
));
new
BoxedBuiltinFunctionOrMethod
(
import_func
,
{
None
,
None
,
None
,
new
BoxedInt
(
-
1
)
}
));
enumerate_cls
enumerate_cls
=
new
BoxedHeapClass
(
object_cls
,
&
BoxedEnumerate
::
gcHandler
,
0
,
sizeof
(
BoxedEnumerate
),
false
,
"enumerate"
);
=
new
BoxedHeapClass
(
object_cls
,
&
BoxedEnumerate
::
gcHandler
,
0
,
sizeof
(
BoxedEnumerate
),
false
,
"enumerate"
);
...
...
src/runtime/builtin_modules/sys.cpp
View file @
a7c10a6e
...
@@ -209,6 +209,12 @@ static std::string generateVersionString() {
...
@@ -209,6 +209,12 @@ static std::string generateVersionString() {
return
oss
.
str
();
return
oss
.
str
();
}
}
static
bool
isLittleEndian
()
{
unsigned
long
number
=
1
;
char
*
s
=
(
char
*
)
&
number
;
return
s
[
0
]
!=
0
;
}
void
setupSys
()
{
void
setupSys
()
{
sys_modules_dict
=
new
BoxedDict
();
sys_modules_dict
=
new
BoxedDict
();
gc
::
registerPermanentRoot
(
sys_modules_dict
);
gc
::
registerPermanentRoot
(
sys_modules_dict
);
...
@@ -235,6 +241,7 @@ void setupSys() {
...
@@ -235,6 +241,7 @@ void setupSys() {
sys_module
->
giveAttr
(
"warnoptions"
,
new
BoxedList
());
sys_module
->
giveAttr
(
"warnoptions"
,
new
BoxedList
());
sys_module
->
giveAttr
(
"py3kwarning"
,
False
);
sys_module
->
giveAttr
(
"py3kwarning"
,
False
);
sys_module
->
giveAttr
(
"byteorder"
,
new
BoxedString
(
isLittleEndian
()
?
"little"
:
"big"
));
sys_module
->
giveAttr
(
"platform"
,
boxStrConstant
(
"unknown"
));
// seems like a reasonable, if poor, default
sys_module
->
giveAttr
(
"platform"
,
boxStrConstant
(
"unknown"
));
// seems like a reasonable, if poor, default
...
...
src/runtime/capi.cpp
View file @
a7c10a6e
...
@@ -426,7 +426,8 @@ extern "C" PyObject* PyObject_Call(PyObject* callable_object, PyObject* args, Py
...
@@ -426,7 +426,8 @@ extern "C" PyObject* PyObject_Call(PyObject* callable_object, PyObject* args, Py
else
else
return
runtimeCall
(
callable_object
,
ArgPassSpec
(
0
,
0
,
true
,
false
),
args
,
NULL
,
NULL
,
NULL
,
NULL
);
return
runtimeCall
(
callable_object
,
ArgPassSpec
(
0
,
0
,
true
,
false
),
args
,
NULL
,
NULL
,
NULL
,
NULL
);
}
catch
(
ExcInfo
e
)
{
}
catch
(
ExcInfo
e
)
{
Py_FatalError
(
"unimplemented"
);
setCAPIException
(
e
);
return
NULL
;
}
}
}
}
...
@@ -712,7 +713,20 @@ extern "C" void PyErr_SetObject(PyObject* exception, PyObject* value) noexcept {
...
@@ -712,7 +713,20 @@ extern "C" void PyErr_SetObject(PyObject* exception, PyObject* value) noexcept {
}
}
extern
"C"
PyObject
*
PyErr_Format
(
PyObject
*
exception
,
const
char
*
format
,
...)
noexcept
{
extern
"C"
PyObject
*
PyErr_Format
(
PyObject
*
exception
,
const
char
*
format
,
...)
noexcept
{
Py_FatalError
(
"unimplemented"
);
va_list
vargs
;
PyObject
*
string
;
#ifdef HAVE_STDARG_PROTOTYPES
va_start
(
vargs
,
format
);
#else
va_start
(
vargs
);
#endif
string
=
PyString_FromFormatV
(
format
,
vargs
);
PyErr_SetObject
(
exception
,
string
);
Py_XDECREF
(
string
);
va_end
(
vargs
);
return
NULL
;
}
}
extern
"C"
int
PyErr_BadArgument
()
noexcept
{
extern
"C"
int
PyErr_BadArgument
()
noexcept
{
...
@@ -1433,6 +1447,8 @@ extern "C" PyObject* Py_FindMethod(PyMethodDef* methods, PyObject* self, const c
...
@@ -1433,6 +1447,8 @@ extern "C" PyObject* Py_FindMethod(PyMethodDef* methods, PyObject* self, const c
}
}
extern
"C"
PyObject
*
PyCFunction_NewEx
(
PyMethodDef
*
ml
,
PyObject
*
self
,
PyObject
*
module
)
noexcept
{
extern
"C"
PyObject
*
PyCFunction_NewEx
(
PyMethodDef
*
ml
,
PyObject
*
self
,
PyObject
*
module
)
noexcept
{
RELEASE_ASSERT
(
module
==
NULL
,
"not implemented"
);
assert
((
ml
->
ml_flags
&
(
~
(
METH_VARARGS
|
METH_KEYWORDS
|
METH_NOARGS
|
METH_O
)))
==
0
);
return
new
BoxedCApiFunction
(
ml
->
ml_flags
,
self
,
ml
->
ml_name
,
ml
->
ml_meth
);
return
new
BoxedCApiFunction
(
ml
->
ml_flags
,
self
,
ml
->
ml_name
,
ml
->
ml_meth
);
}
}
...
...
src/runtime/import.cpp
View file @
a7c10a6e
...
@@ -159,14 +159,16 @@ static Box* importSub(const std::string& name, const std::string& full_name, Box
...
@@ -159,14 +159,16 @@ static Box* importSub(const std::string& name, const std::string& full_name, Box
return
NULL
;
return
NULL
;
}
}
static
Box
*
import
(
const
std
::
string
*
name
,
bool
return_first
)
{
static
Box
*
import
(
const
std
::
string
*
name
,
bool
return_first
,
int
level
)
{
assert
(
name
);
assert
(
name
);
assert
(
name
->
size
()
>
0
);
assert
(
name
->
size
()
>
0
);
static
StatCounter
slowpath_import
(
"slowpath_import"
);
static
StatCounter
slowpath_import
(
"slowpath_import"
);
slowpath_import
.
log
();
slowpath_import
.
log
();
BoxedDict
*
sys_modules
=
getSysModulesDict
();
RELEASE_ASSERT
(
level
==
-
1
||
level
==
0
,
"not implemented"
);
if
(
level
==
0
)
printf
(
"Warning: import level 0 will be treated as -1!
\n
"
);
size_t
l
=
0
,
r
;
size_t
l
=
0
,
r
;
Box
*
last_module
=
NULL
;
Box
*
last_module
=
NULL
;
...
@@ -210,6 +212,23 @@ extern "C" PyObject* PyImport_ImportModuleNoBlock(const char* name) noexcept {
...
@@ -210,6 +212,23 @@ extern "C" PyObject* PyImport_ImportModuleNoBlock(const char* name) noexcept {
Py_FatalError
(
"unimplemented"
);
Py_FatalError
(
"unimplemented"
);
}
}
// This function has the same behaviour as __import__()
extern
"C"
PyObject
*
PyImport_ImportModuleLevel
(
const
char
*
name
,
PyObject
*
globals
,
PyObject
*
locals
,
PyObject
*
fromlist
,
int
level
)
noexcept
{
RELEASE_ASSERT
(
globals
==
NULL
,
"not implemented"
);
RELEASE_ASSERT
(
locals
==
NULL
,
"not implemented"
);
RELEASE_ASSERT
(
fromlist
==
NULL
,
"not implemented"
);
RELEASE_ASSERT
(
level
==
0
,
"not implemented"
);
try
{
std
::
string
module_name
=
name
;
return
import
(
level
,
fromlist
?
fromlist
:
None
,
&
module_name
);
}
catch
(
ExcInfo
e
)
{
setCAPIException
(
e
);
return
NULL
;
}
}
// Named the same thing as the CPython method:
// Named the same thing as the CPython method:
static
void
ensure_fromlist
(
Box
*
module
,
Box
*
fromlist
,
const
std
::
string
&
module_name
,
bool
recursive
)
{
static
void
ensure_fromlist
(
Box
*
module
,
Box
*
fromlist
,
const
std
::
string
&
module_name
,
bool
recursive
)
{
if
(
module
->
getattr
(
"__path__"
)
==
NULL
)
{
if
(
module
->
getattr
(
"__path__"
)
==
NULL
)
{
...
@@ -243,9 +262,9 @@ static void ensure_fromlist(Box* module, Box* fromlist, const std::string& modul
...
@@ -243,9 +262,9 @@ static void ensure_fromlist(Box* module, Box* fromlist, const std::string& modul
}
}
extern
"C"
Box
*
import
(
int
level
,
Box
*
from_imports
,
const
std
::
string
*
module_name
)
{
extern
"C"
Box
*
import
(
int
level
,
Box
*
from_imports
,
const
std
::
string
*
module_name
)
{
RELEASE_ASSERT
(
level
==
-
1
,
"
"
);
RELEASE_ASSERT
(
level
==
-
1
||
level
==
0
,
"not implemented
"
);
Box
*
module
=
import
(
module_name
,
from_imports
==
None
);
Box
*
module
=
import
(
module_name
,
from_imports
==
None
,
level
);
assert
(
module
);
assert
(
module
);
if
(
from_imports
!=
None
)
{
if
(
from_imports
!=
None
)
{
...
...
src/runtime/str.cpp
View file @
a7c10a6e
...
@@ -21,6 +21,8 @@
...
@@ -21,6 +21,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/raw_ostream.h"
#include "Python.h"
#include "core/common.h"
#include "core/common.h"
#include "core/types.h"
#include "core/types.h"
#include "core/util.h"
#include "core/util.h"
...
@@ -1072,14 +1074,16 @@ static bool _needs_escaping[256]
...
@@ -1072,14 +1074,16 @@ static bool _needs_escaping[256]
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
};
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
,
true
};
static
char
_hex
[
17
]
=
"0123456789abcdef"
;
// really only needs to be 16 but clang will complain
static
char
_hex
[
17
]
=
"0123456789abcdef"
;
// really only needs to be 16 but clang will complain
extern
"C"
Box
*
strRepr
(
BoxedString
*
self
)
{
extern
"C"
PyObject
*
PyString_Repr
(
PyObject
*
obj
,
int
smartquotes
)
noexcept
{
BoxedString
*
self
=
(
BoxedString
*
)
obj
;
assert
(
self
->
cls
==
str_cls
);
assert
(
self
->
cls
==
str_cls
);
std
::
ostringstream
os
(
""
);
std
::
ostringstream
os
(
""
);
const
std
::
string
&
s
=
self
->
s
;
const
std
::
string
&
s
=
self
->
s
;
char
quote
=
'\''
;
char
quote
=
'\''
;
if
(
s
.
find
(
'\''
,
0
)
!=
std
::
string
::
npos
&&
s
.
find
(
'\"'
,
0
)
==
std
::
string
::
npos
)
{
if
(
s
martquotes
&&
s
.
find
(
'\''
,
0
)
!=
std
::
string
::
npos
&&
s
.
find
(
'\"'
,
0
)
==
std
::
string
::
npos
)
{
quote
=
'\"'
;
quote
=
'\"'
;
}
}
os
<<
quote
;
os
<<
quote
;
...
@@ -1125,6 +1129,187 @@ extern "C" Box* strRepr(BoxedString* self) {
...
@@ -1125,6 +1129,187 @@ extern "C" Box* strRepr(BoxedString* self) {
return
boxString
(
os
.
str
());
return
boxString
(
os
.
str
());
}
}
extern
"C"
Box
*
strRepr
(
BoxedString
*
self
)
{
return
PyString_Repr
(
self
,
1
/* smartquotes */
);
}
/* Unescape a backslash-escaped string. If unicode is non-zero,
the string is a u-literal. If recode_encoding is non-zero,
the string is UTF-8 encoded and should be re-encoded in the
specified encoding. */
extern
"C"
PyObject
*
PyString_DecodeEscape
(
const
char
*
s
,
Py_ssize_t
len
,
const
char
*
errors
,
Py_ssize_t
unicode
,
const
char
*
recode_encoding
)
noexcept
{
int
c
;
char
*
p
,
*
buf
;
const
char
*
end
;
PyObject
*
v
;
Py_ssize_t
newlen
=
recode_encoding
?
4
*
len
:
len
;
v
=
PyString_FromStringAndSize
((
char
*
)
NULL
,
newlen
);
if
(
v
==
NULL
)
return
NULL
;
p
=
buf
=
PyString_AsString
(
v
);
end
=
s
+
len
;
while
(
s
<
end
)
{
if
(
*
s
!=
'\\'
)
{
non_esc:
#ifdef Py_USING_UNICODE
if
(
recode_encoding
&&
(
*
s
&
0x80
))
{
PyObject
*
u
,
*
w
;
char
*
r
;
const
char
*
t
;
Py_ssize_t
rn
;
t
=
s
;
/* Decode non-ASCII bytes as UTF-8. */
while
(
t
<
end
&&
(
*
t
&
0x80
))
t
++
;
u
=
PyUnicode_DecodeUTF8
(
s
,
t
-
s
,
errors
);
if
(
!
u
)
goto
failed
;
/* Recode them in target encoding. */
w
=
PyUnicode_AsEncodedString
(
u
,
recode_encoding
,
errors
);
Py_DECREF
(
u
);
if
(
!
w
)
goto
failed
;
/* Append bytes to output buffer. */
assert
(
PyString_Check
(
w
));
r
=
PyString_AS_STRING
(
w
);
rn
=
PyString_GET_SIZE
(
w
);
Py_MEMCPY
(
p
,
r
,
rn
);
p
+=
rn
;
Py_DECREF
(
w
);
s
=
t
;
}
else
{
*
p
++
=
*
s
++
;
}
#else
*
p
++
=
*
s
++
;
#endif
continue
;
}
s
++
;
if
(
s
==
end
)
{
PyErr_SetString
(
PyExc_ValueError
,
"Trailing
\\
in string"
);
goto
failed
;
}
switch
(
*
s
++
)
{
/* XXX This assumes ASCII! */
case
'\n'
:
break
;
case
'\\'
:
*
p
++
=
'\\'
;
break
;
case
'\''
:
*
p
++
=
'\''
;
break
;
case
'\"'
:
*
p
++
=
'\"'
;
break
;
case
'b'
:
*
p
++
=
'\b'
;
break
;
case
'f'
:
*
p
++
=
'\014'
;
break
;
/* FF */
case
't'
:
*
p
++
=
'\t'
;
break
;
case
'n'
:
*
p
++
=
'\n'
;
break
;
case
'r'
:
*
p
++
=
'\r'
;
break
;
case
'v'
:
*
p
++
=
'\013'
;
break
;
/* VT */
case
'a'
:
*
p
++
=
'\007'
;
break
;
/* BEL, not classic C */
case
'0'
:
case
'1'
:
case
'2'
:
case
'3'
:
case
'4'
:
case
'5'
:
case
'6'
:
case
'7'
:
c
=
s
[
-
1
]
-
'0'
;
if
(
s
<
end
&&
'0'
<=
*
s
&&
*
s
<=
'7'
)
{
c
=
(
c
<<
3
)
+
*
s
++
-
'0'
;
if
(
s
<
end
&&
'0'
<=
*
s
&&
*
s
<=
'7'
)
c
=
(
c
<<
3
)
+
*
s
++
-
'0'
;
}
*
p
++
=
c
;
break
;
case
'x'
:
if
(
s
+
1
<
end
&&
isxdigit
(
Py_CHARMASK
(
s
[
0
]))
&&
isxdigit
(
Py_CHARMASK
(
s
[
1
])))
{
unsigned
int
x
=
0
;
c
=
Py_CHARMASK
(
*
s
);
s
++
;
if
(
isdigit
(
c
))
x
=
c
-
'0'
;
else
if
(
islower
(
c
))
x
=
10
+
c
-
'a'
;
else
x
=
10
+
c
-
'A'
;
x
=
x
<<
4
;
c
=
Py_CHARMASK
(
*
s
);
s
++
;
if
(
isdigit
(
c
))
x
+=
c
-
'0'
;
else
if
(
islower
(
c
))
x
+=
10
+
c
-
'a'
;
else
x
+=
10
+
c
-
'A'
;
*
p
++
=
x
;
break
;
}
if
(
!
errors
||
strcmp
(
errors
,
"strict"
)
==
0
)
{
PyErr_SetString
(
PyExc_ValueError
,
"invalid
\\
x escape"
);
goto
failed
;
}
if
(
strcmp
(
errors
,
"replace"
)
==
0
)
{
*
p
++
=
'?'
;
}
else
if
(
strcmp
(
errors
,
"ignore"
)
==
0
)
/* do nothing */
;
else
{
PyErr_Format
(
PyExc_ValueError
,
"decoding error; "
"unknown error handling code: %.400s"
,
errors
);
goto
failed
;
}
/* skip \x */
if
(
s
<
end
&&
isxdigit
(
Py_CHARMASK
(
s
[
0
])))
s
++
;
/* and a hexdigit */
break
;
#ifndef Py_USING_UNICODE
case
'u'
:
case
'U'
:
case
'N'
:
if
(
unicode
)
{
PyErr_SetString
(
PyExc_ValueError
,
"Unicode escapes not legal "
"when Unicode disabled"
);
goto
failed
;
}
#endif
default:
*
p
++
=
'\\'
;
s
--
;
goto
non_esc
;
/* an arbitrary number of unescaped
UTF-8 bytes may follow. */
}
}
if
(
p
-
buf
<
newlen
)
_PyString_Resize
(
&
v
,
p
-
buf
);
/* v is cleared on error */
return
v
;
failed:
Py_DECREF
(
v
);
return
NULL
;
}
extern
"C"
Box
*
strHash
(
BoxedString
*
self
)
{
extern
"C"
Box
*
strHash
(
BoxedString
*
self
)
{
assert
(
self
->
cls
==
str_cls
);
assert
(
self
->
cls
==
str_cls
);
...
@@ -1650,6 +1835,44 @@ Box* strEndswith(BoxedString* self, Box* elt, Box* start, Box** _args) {
...
@@ -1650,6 +1835,44 @@ Box* strEndswith(BoxedString* self, Box* elt, Box* start, Box** _args) {
return
boxBool
(
self
->
s
.
compare
(
istart
,
sub
->
s
.
size
(),
sub
->
s
)
==
0
);
return
boxBool
(
self
->
s
.
compare
(
istart
,
sub
->
s
.
size
(),
sub
->
s
)
==
0
);
}
}
Box
*
strDecode
(
BoxedString
*
self
,
Box
*
encoding
,
Box
*
error
)
{
if
(
self
->
cls
!=
str_cls
)
raiseExcHelper
(
TypeError
,
"descriptor 'decode' requires a 'str' object but received a '%s'"
,
getTypeName
(
self
));
BoxedString
*
encoding_str
=
(
BoxedString
*
)
encoding
;
BoxedString
*
error_str
=
(
BoxedString
*
)
error
;
if
(
encoding_str
&&
encoding_str
->
cls
!=
str_cls
)
raiseExcHelper
(
TypeError
,
"decode() argument 1 must be string, not '%s'"
,
getTypeName
(
encoding_str
));
if
(
error_str
&&
error_str
->
cls
!=
str_cls
)
raiseExcHelper
(
TypeError
,
"decode() argument 2 must be string, not '%s'"
,
getTypeName
(
error_str
));
Box
*
result
=
PyCodec_Decode
(
self
,
encoding_str
?
encoding_str
->
s
.
c_str
()
:
NULL
,
error_str
?
error_str
->
s
.
c_str
()
:
NULL
);
checkAndThrowCAPIException
();
return
result
;
}
Box
*
strEncode
(
BoxedString
*
self
,
Box
*
encoding
,
Box
*
error
)
{
if
(
self
->
cls
!=
str_cls
)
raiseExcHelper
(
TypeError
,
"descriptor 'encode' requires a 'str' object but received a '%s'"
,
getTypeName
(
self
));
BoxedString
*
encoding_str
=
(
BoxedString
*
)
encoding
;
BoxedString
*
error_str
=
(
BoxedString
*
)
error
;
if
(
encoding_str
&&
encoding_str
->
cls
!=
str_cls
)
raiseExcHelper
(
TypeError
,
"encode() argument 1 must be string, not '%s'"
,
getTypeName
(
encoding_str
));
if
(
error_str
&&
error_str
->
cls
!=
str_cls
)
raiseExcHelper
(
TypeError
,
"encode() argument 2 must be string, not '%s'"
,
getTypeName
(
error_str
));
Box
*
result
=
PyCodec_Encode
(
self
,
encoding_str
?
encoding_str
->
s
.
c_str
()
:
NULL
,
error_str
?
error_str
->
s
.
c_str
()
:
NULL
);
checkAndThrowCAPIException
();
return
result
;
}
Box
*
strFind
(
BoxedString
*
self
,
Box
*
elt
,
Box
*
_start
)
{
Box
*
strFind
(
BoxedString
*
self
,
Box
*
elt
,
Box
*
_start
)
{
if
(
self
->
cls
!=
str_cls
)
if
(
self
->
cls
!=
str_cls
)
raiseExcHelper
(
TypeError
,
"descriptor 'find' requires a 'str' object but received a '%s'"
,
getTypeName
(
self
));
raiseExcHelper
(
TypeError
,
"descriptor 'find' requires a 'str' object but received a '%s'"
,
getTypeName
(
self
));
...
@@ -1932,6 +2155,11 @@ void setupStr() {
...
@@ -1932,6 +2155,11 @@ void setupStr() {
str_cls
->
giveAttr
(
"istitle"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strIsTitle
,
BOXED_BOOL
,
1
)));
str_cls
->
giveAttr
(
"istitle"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strIsTitle
,
BOXED_BOOL
,
1
)));
str_cls
->
giveAttr
(
"isupper"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strIsUpper
,
BOXED_BOOL
,
1
)));
str_cls
->
giveAttr
(
"isupper"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strIsUpper
,
BOXED_BOOL
,
1
)));
str_cls
->
giveAttr
(
"decode"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strDecode
,
UNKNOWN
,
3
,
2
,
false
,
false
),
{
0
,
0
}));
str_cls
->
giveAttr
(
"encode"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strEncode
,
UNKNOWN
,
3
,
2
,
false
,
false
),
{
0
,
0
}));
str_cls
->
giveAttr
(
"lower"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strLower
,
STR
,
1
)));
str_cls
->
giveAttr
(
"lower"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strLower
,
STR
,
1
)));
str_cls
->
giveAttr
(
"swapcase"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strSwapcase
,
STR
,
1
)));
str_cls
->
giveAttr
(
"swapcase"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strSwapcase
,
STR
,
1
)));
str_cls
->
giveAttr
(
"upper"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strUpper
,
STR
,
1
)));
str_cls
->
giveAttr
(
"upper"
,
new
BoxedFunction
(
boxRTFunction
((
void
*
)
strUpper
,
STR
,
1
)));
...
...
src/runtime/tuple.cpp
View file @
a7c10a6e
...
@@ -305,8 +305,6 @@ extern "C" Box* tupleNew(Box* _cls, BoxedTuple* args, BoxedDict* kwargs) {
...
@@ -305,8 +305,6 @@ extern "C" Box* tupleNew(Box* _cls, BoxedTuple* args, BoxedDict* kwargs) {
raiseExcHelper
(
TypeError
,
"tuple.__new__(%s): %s is not a subtype of tuple"
,
getNameOfClass
(
cls
),
raiseExcHelper
(
TypeError
,
"tuple.__new__(%s): %s is not a subtype of tuple"
,
getNameOfClass
(
cls
),
getNameOfClass
(
cls
));
getNameOfClass
(
cls
));
RELEASE_ASSERT
(
cls
==
tuple_cls
,
""
);
int
args_sz
=
args
->
elts
.
size
();
int
args_sz
=
args
->
elts
.
size
();
int
kwargs_sz
=
kwargs
->
d
.
size
();
int
kwargs_sz
=
kwargs
->
d
.
size
();
...
@@ -335,7 +333,7 @@ extern "C" Box* tupleNew(Box* _cls, BoxedTuple* args, BoxedDict* kwargs) {
...
@@ -335,7 +333,7 @@ extern "C" Box* tupleNew(Box* _cls, BoxedTuple* args, BoxedDict* kwargs) {
velts
.
push_back
(
e
);
velts
.
push_back
(
e
);
}
}
return
new
BoxedTuple
(
std
::
move
(
velts
));
return
new
(
cls
)
BoxedTuple
(
std
::
move
(
velts
));
}
}
extern
"C"
int
PyTuple_SetItem
(
PyObject
*
op
,
Py_ssize_t
i
,
PyObject
*
newitem
)
noexcept
{
extern
"C"
int
PyTuple_SetItem
(
PyObject
*
op
,
Py_ssize_t
i
,
PyObject
*
newitem
)
noexcept
{
...
...
src/runtime/types.cpp
View file @
a7c10a6e
...
@@ -62,6 +62,7 @@ extern "C" void initfcntl();
...
@@ -62,6 +62,7 @@ extern "C" void initfcntl();
extern
"C"
void
inittime
();
extern
"C"
void
inittime
();
extern
"C"
void
initarray
();
extern
"C"
void
initarray
();
extern
"C"
void
initzlib
();
extern
"C"
void
initzlib
();
extern
"C"
void
init_codecs
();
namespace
pyston
{
namespace
pyston
{
...
@@ -1373,6 +1374,7 @@ void setupRuntime() {
...
@@ -1373,6 +1374,7 @@ void setupRuntime() {
inittime
();
inittime
();
initarray
();
initarray
();
initzlib
();
initzlib
();
init_codecs
();
setupSysEnd
();
setupSysEnd
();
...
...
src/runtime/unicode.cpp
View file @
a7c10a6e
...
@@ -373,6 +373,42 @@ extern "C" Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch) noexcept {
...
@@ -373,6 +373,42 @@ extern "C" Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch) noexcept {
Py_FatalError
(
"unimplemented"
);
Py_FatalError
(
"unimplemented"
);
}
}
extern
"C"
int
PyUnicodeEncodeError_GetStart
(
PyObject
*
,
Py_ssize_t
*
)
noexcept
{
Py_FatalError
(
"unimplemented"
);
}
extern
"C"
int
PyUnicodeDecodeError_GetStart
(
PyObject
*
,
Py_ssize_t
*
)
noexcept
{
Py_FatalError
(
"unimplemented"
);
}
extern
"C"
int
PyUnicodeTranslateError_GetStart
(
PyObject
*
,
Py_ssize_t
*
)
noexcept
{
Py_FatalError
(
"unimplemented"
);
}
extern
"C"
int
PyUnicodeEncodeError_GetEnd
(
PyObject
*
,
Py_ssize_t
*
)
noexcept
{
Py_FatalError
(
"unimplemented"
);
}
extern
"C"
int
PyUnicodeDecodeError_GetEnd
(
PyObject
*
,
Py_ssize_t
*
)
noexcept
{
Py_FatalError
(
"unimplemented"
);
}
extern
"C"
int
PyUnicodeTranslateError_GetEnd
(
PyObject
*
,
Py_ssize_t
*
)
noexcept
{
Py_FatalError
(
"unimplemented"
);
}
extern
"C"
PyObject
*
PyUnicodeEncodeError_GetObject
(
PyObject
*
)
noexcept
{
Py_FatalError
(
"unimplemented"
);
}
extern
"C"
PyObject
*
_PyUnicode_DecodeUnicodeInternal
(
const
char
*
s
,
Py_ssize_t
size
,
const
char
*
errors
)
noexcept
{
Py_FatalError
(
"unimplemented"
);
}
extern
"C"
PyObject
*
PyUnicode_BuildEncodingMap
(
PyObject
*
string
)
noexcept
{
Py_FatalError
(
"unimplemented"
);
}
// From CPython, unicodeobject.c
// From CPython, unicodeobject.c
// Used by Py_UNICODE_ISSPACE in unicodeobject.h
// Used by Py_UNICODE_ISSPACE in unicodeobject.h
/* Fast detection of the most frequent whitespace characters */
/* Fast detection of the most frequent whitespace characters */
...
...
test/tests/optparse_test.py
View file @
a7c10a6e
# allow-warning: converting unicode literal to str
# allow-warning: converting unicode literal to str
# allow-warning: import level 0 will be treated as -1
# Simple optparse test, taken from the optparse.py docstring:
# Simple optparse test, taken from the optparse.py docstring:
from
optparse
import
OptionParser
from
optparse
import
OptionParser
...
...
test/tests/str_encode_decode.py
0 → 100644
View file @
a7c10a6e
# allow-warning: converting unicode literal to str
# allow-warning: import level 0 will be treated as -1!
def
test
(
string
,
encoding
):
s
=
string
.
encode
(
encoding
)
print
encoding
,
s
assert
string
==
s
.
decode
(
encoding
)
test
(
"hello world"
,
"hex"
)
test
(
"hello world"
,
"base64"
)
test
(
"
\
r
\
n
\
\
"
,
"string-escape"
)
test/tests/sys_test.py
View file @
a7c10a6e
...
@@ -7,3 +7,4 @@ print sys.version[:3]
...
@@ -7,3 +7,4 @@ print sys.version[:3]
print
os
.
path
.
exists
(
sys
.
executable
)
print
os
.
path
.
exists
(
sys
.
executable
)
print
sys
.
prefix
,
sys
.
exec_prefix
print
sys
.
prefix
,
sys
.
exec_prefix
print
sys
.
copyright
[
-
200
:]
print
sys
.
copyright
[
-
200
:]
print
sys
.
byteorder
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment