Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
P
Pyston
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Boxiang Sun
Pyston
Commits
cea982c5
Commit
cea982c5
authored
Apr 22, 2015
by
Marius Wachtler
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add support for specifying the encoding inside source files.
parent
5c1e2802
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
263 additions
and
16 deletions
+263
-16
from_cpython/Include/fileobject.h
from_cpython/Include/fileobject.h
+1
-1
libpypa
libpypa
+1
-1
src/codegen/parser.cpp
src/codegen/parser.cpp
+1
-0
src/codegen/pypa-parser.cpp
src/codegen/pypa-parser.cpp
+241
-13
src/gc/collector.cpp
src/gc/collector.cpp
+6
-0
src/gc/collector.h
src/gc/collector.h
+2
-0
src/runtime/file.cpp
src/runtime/file.cpp
+1
-1
test/tests/coding_cp1252.py
test/tests/coding_cp1252.py
+3
-0
test/tests/coding_koi8.py
test/tests/coding_koi8.py
+7
-0
No files found.
from_cpython/Include/fileobject.h
View file @
cea982c5
...
...
@@ -51,7 +51,7 @@ PyAPI_FUNC(PyObject *) PyFile_FromString(char *, char *) PYSTON_NOEXCEPT;
PyAPI_FUNC
(
void
)
PyFile_SetBufSize
(
PyObject
*
,
int
)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
int
)
PyFile_SetEncoding
(
PyObject
*
,
const
char
*
)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
int
)
PyFile_SetEncodingAndErrors
(
PyObject
*
,
const
char
*
,
char
*
errors
)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
PyObject
*
)
PyFile_FromFile
(
FILE
*
,
c
har
*
,
char
*
,
PyAPI_FUNC
(
PyObject
*
)
PyFile_FromFile
(
FILE
*
,
c
onst
char
*
,
const
char
*
,
int
(
*
)(
FILE
*
))
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
FILE
*
)
PyFile_AsFile
(
PyObject
*
)
PYSTON_NOEXCEPT
;
PyAPI_FUNC
(
void
)
PyFile_IncUseCount
(
PyFileObject
*
)
PYSTON_NOEXCEPT
;
...
...
libpypa
@
91428d4e
Subproject commit
51f55dd4068f116ca287369cedddce624de73da6
Subproject commit
91428d4e7a72c53a058682f3a8d1993a90efdccd
src/codegen/parser.cpp
View file @
cea982c5
...
...
@@ -973,6 +973,7 @@ AST_Module* parse_string(const char* code) {
FILE
*
f
=
fopen
(
tmp
.
c_str
(),
"w"
);
fwrite
(
code
,
1
,
size
,
f
);
fputc
(
'\n'
,
f
);
fclose
(
f
);
AST_Module
*
m
=
parse_file
(
tmp
.
c_str
());
...
...
src/codegen/pypa-parser.cpp
View file @
cea982c5
...
...
@@ -32,6 +32,7 @@
#include "core/stats.h"
#include "core/types.h"
#include "core/util.h"
#include "gc/collector.h"
#include "runtime/capi.h"
#include "runtime/objmodel.h"
#include "runtime/types.h"
...
...
@@ -839,29 +840,256 @@ void pypaErrorHandler(pypa::Error e) {
}
}
pypa
::
String
pypaUnicodeEscapeDecoder
(
pypa
::
String
s
,
bool
raw_prefix
,
bool
&
error
)
{
static
PyObject
*
decode_utf8
(
const
char
**
sPtr
,
const
char
*
end
,
const
char
*
encoding
)
noexcept
{
#ifndef Py_USING_UNICODE
Py_FatalError
(
"decode_utf8 should not be called in this build."
);
return
NULL
;
#else
PyObject
*
u
,
*
v
;
const
char
*
s
,
*
t
;
t
=
s
=
(
const
char
*
)
*
sPtr
;
/* while (s < end && *s != '\\') s++; */
/* inefficient for u".." */
while
(
s
<
end
&&
(
*
s
&
0x80
))
s
++
;
*
sPtr
=
s
;
u
=
PyUnicode_DecodeUTF8
(
t
,
s
-
t
,
NULL
);
if
(
u
==
NULL
)
return
NULL
;
v
=
PyUnicode_AsEncodedString
(
u
,
encoding
,
NULL
);
Py_DECREF
(
u
);
return
v
;
#endif
}
#ifdef Py_USING_UNICODE
static
PyObject
*
decode_unicode
(
const
char
*
s
,
size_t
len
,
int
rawmode
,
const
char
*
encoding
)
noexcept
{
PyObject
*
v
;
PyObject
*
u
=
NULL
;
char
*
buf
;
char
*
p
;
const
char
*
end
;
if
(
encoding
!=
NULL
&&
strcmp
(
encoding
,
"iso-8859-1"
))
{
/* check for integer overflow */
if
(
len
>
PY_SIZE_MAX
/
6
)
return
NULL
;
/* "<C3><A4>" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
"\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
u
=
PyString_FromStringAndSize
((
char
*
)
NULL
,
len
*
6
);
if
(
u
==
NULL
)
return
NULL
;
p
=
buf
=
PyString_AsString
(
u
);
end
=
s
+
len
;
while
(
s
<
end
)
{
if
(
*
s
==
'\\'
)
{
*
p
++
=
*
s
++
;
if
(
*
s
&
0x80
)
{
strcpy
(
p
,
"u005c"
);
p
+=
5
;
}
}
if
(
*
s
&
0x80
)
{
/* XXX inefficient */
PyObject
*
w
;
char
*
r
;
Py_ssize_t
rn
,
i
;
w
=
decode_utf8
(
&
s
,
end
,
"utf-32-be"
);
if
(
w
==
NULL
)
{
Py_DECREF
(
u
);
return
NULL
;
}
r
=
PyString_AsString
(
w
);
rn
=
PyString_Size
(
w
);
assert
(
rn
%
4
==
0
);
for
(
i
=
0
;
i
<
rn
;
i
+=
4
)
{
sprintf
(
p
,
"
\\
U%02x%02x%02x%02x"
,
r
[
i
+
0
]
&
0xFF
,
r
[
i
+
1
]
&
0xFF
,
r
[
i
+
2
]
&
0xFF
,
r
[
i
+
3
]
&
0xFF
);
p
+=
10
;
}
Py_DECREF
(
w
);
}
else
{
*
p
++
=
*
s
++
;
}
}
len
=
p
-
buf
;
s
=
buf
;
}
if
(
rawmode
)
v
=
PyUnicode_DecodeRawUnicodeEscape
(
s
,
len
,
NULL
);
else
v
=
PyUnicode_DecodeUnicodeEscape
(
s
,
len
,
NULL
);
Py_XDECREF
(
u
);
return
v
;
}
#endif
pypa
::
String
pypaEscapeDecoder
(
const
pypa
::
String
&
s
,
const
pypa
::
String
&
encoding
,
bool
unicode
,
bool
raw_prefix
,
bool
&
error
)
{
try
{
error
=
false
;
Box
*
unicode
=
NULL
;
if
(
raw_prefix
)
unicode
=
PyUnicode_DecodeRawUnicodeEscape
(
s
.
c_str
(),
s
.
size
(),
"strict"
);
else
unicode
=
PyUnicode_DecodeUnicodeEscape
(
s
.
c_str
(),
s
.
size
(),
"strict"
);
checkAndThrowCAPIException
();
BoxedString
*
str_utf8
=
(
BoxedString
*
)
PyUnicode_AsUTF8String
(
unicode
);
checkAndThrowCAPIException
();
return
std
::
string
(
str_utf8
->
s
);
if
(
unicode
)
{
PyObject
*
str
=
decode_unicode
(
s
.
c_str
(),
s
.
size
(),
raw_prefix
,
encoding
.
c_str
());
if
(
!
str
)
throwCAPIException
();
BoxedString
*
str_utf8
=
(
BoxedString
*
)
PyUnicode_AsUTF8String
(
str
);
assert
(
str_utf8
->
cls
==
str_cls
);
checkAndThrowCAPIException
();
return
str_utf8
->
s
.
str
();
}
bool
need_encoding
=
encoding
!=
"utf-8"
&&
encoding
!=
"iso-8859-1"
;
if
(
raw_prefix
||
s
.
find
(
'\\'
)
==
pypa
::
String
::
npos
)
{
if
(
need_encoding
)
{
PyObject
*
u
=
PyUnicode_DecodeUTF8
(
s
.
c_str
(),
s
.
size
(),
NULL
);
if
(
!
u
)
throwCAPIException
();
BoxedString
*
str
=
(
BoxedString
*
)
PyUnicode_AsEncodedString
(
u
,
encoding
.
c_str
(),
NULL
);
assert
(
str
->
cls
==
str_cls
);
return
str
->
s
.
str
();
}
else
{
return
s
;
}
}
BoxedString
*
decoded
=
(
BoxedString
*
)
PyString_DecodeEscape
(
s
.
c_str
(),
s
.
size
(),
NULL
,
false
,
need_encoding
?
encoding
.
c_str
()
:
NULL
);
if
(
!
decoded
)
throwCAPIException
();
assert
(
decoded
->
cls
==
str_cls
);
return
decoded
->
s
.
str
();
}
catch
(
ExcInfo
e
)
{
error
=
true
;
BoxedString
*
error_message
=
str
(
e
.
value
);
if
(
error_message
&&
error_message
->
cls
==
str_cls
)
return
std
::
string
(
error_message
->
s
);
return
"Encountered an unknown error inside pypa
Unicode
EscapeDecoder"
;
return
"Encountered an unknown error inside pypaEscapeDecoder"
;
}
}
class
PystonSourceReader
:
public
pypa
::
Reader
{
public:
PystonSourceReader
();
~
PystonSourceReader
()
override
;
bool
open_file
(
const
std
::
string
&
file_path
);
void
close
();
bool
set_encoding
(
const
std
::
string
&
coding
)
override
;
std
::
string
get_encoding
()
const
{
return
encoding
;
}
std
::
string
get_line
()
override
;
unsigned
get_line_number
()
const
override
{
return
line_number
;
}
std
::
string
get_filename
()
const
override
{
return
file_path
;
}
bool
eof
()
const
override
{
return
is_eof
;
}
private:
char
next
();
std
::
string
file_path
;
bool
is_eof
;
FILE
*
file
;
unsigned
line_number
;
PyObject
*
readline
;
std
::
string
encoding
;
};
PystonSourceReader
::
PystonSourceReader
()
:
file
(
nullptr
),
readline
(
nullptr
)
{
close
();
}
PystonSourceReader
::~
PystonSourceReader
()
{
close
();
}
bool
PystonSourceReader
::
open_file
(
const
std
::
string
&
_file_path
)
{
file
=
fopen
(
_file_path
.
c_str
(),
"r"
);
if
(
!
file
)
return
false
;
file_path
=
_file_path
;
is_eof
=
false
;
line_number
=
0
;
readline
=
nullptr
;
return
true
;
}
void
PystonSourceReader
::
close
()
{
if
(
file
)
fclose
(
file
);
file
=
nullptr
;
file_path
.
clear
();
is_eof
=
true
;
if
(
readline
)
gc
::
deregisterPermanentRoot
(
readline
);
readline
=
nullptr
;
line_number
=
0
;
}
bool
PystonSourceReader
::
set_encoding
(
const
std
::
string
&
coding
)
{
PyObject
*
stream
=
PyFile_FromFile
(
file
,
file_path
.
c_str
(),
"rb"
,
NULL
);
if
(
stream
==
NULL
)
return
false
;
PyObject
*
reader
=
PyCodec_StreamReader
(
coding
.
c_str
(),
stream
,
NULL
);
if
(
reader
==
NULL
)
return
false
;
readline
=
PyObject_GetAttrString
(
reader
,
"readline"
);
if
(
readline
==
NULL
)
return
false
;
gc
::
registerPermanentRoot
(
readline
);
return
true
;
}
char
PystonSourceReader
::
next
()
{
if
(
is_eof
)
return
0
;
int
c
=
fgetc
(
file
);
if
(
c
==
EOF
)
{
is_eof
=
true
;
return
0
;
}
return
c
;
}
std
::
string
PystonSourceReader
::
get_line
()
{
if
(
eof
())
return
std
::
string
();
if
(
!
readline
)
{
std
::
string
line
;
char
c
;
do
{
c
=
next
();
if
(
eof
())
break
;
line
.
push_back
(
c
);
}
while
(
c
!=
'\n'
&&
c
!=
'\x0c'
);
if
(
!
eof
())
++
line_number
;
return
line
;
}
BoxedString
*
line
=
(
BoxedString
*
)
runtimeCall
(
readline
,
ArgPassSpec
(
0
),
0
,
0
,
0
,
0
,
0
);
if
(
line
->
cls
==
unicode_cls
)
{
line
=
(
BoxedString
*
)
PyUnicode_AsUTF8String
(
line
);
if
(
line
==
NULL
)
{
is_eof
=
true
;
return
std
::
string
();
}
}
assert
(
line
->
cls
==
str_cls
);
if
(
!
line
->
size
())
is_eof
=
true
;
++
line_number
;
return
line
->
s
;
}
AST_Module
*
pypa_parse
(
char
const
*
file_path
)
{
pypa
::
Lexer
lexer
(
file_path
);
auto
reader
=
llvm
::
make_unique
<
PystonSourceReader
>
();
if
(
!
reader
->
open_file
(
file_path
))
return
nullptr
;
pypa
::
Lexer
lexer
(
std
::
move
(
reader
));
pypa
::
SymbolTablePtr
symbols
;
pypa
::
AstModulePtr
module
;
pypa
::
ParserOptions
options
;
...
...
@@ -871,7 +1099,7 @@ AST_Module* pypa_parse(char const* file_path) {
options
.
python3only
=
false
;
options
.
handle_future_errors
=
false
;
options
.
error_handler
=
pypaErrorHandler
;
options
.
unicode_escape_handler
=
pypaUnicode
EscapeDecoder
;
options
.
escape_handler
=
pypa
EscapeDecoder
;
if
(
pypa
::
parse
(
lexer
,
module
,
symbols
,
options
)
&&
module
)
{
return
readModule
(
*
module
);
...
...
src/gc/collector.cpp
View file @
cea982c5
...
...
@@ -136,6 +136,12 @@ void registerPermanentRoot(void* obj, bool allow_duplicates) {
roots
.
insert
(
obj
);
}
void
deregisterPermanentRoot
(
void
*
obj
)
{
assert
(
global_heap
.
getAllocationFromInteriorPointer
(
obj
));
ASSERT
(
roots
.
count
(
obj
),
""
);
roots
.
erase
(
obj
);
}
extern
"C"
PyObject
*
PyGC_AddRoot
(
PyObject
*
obj
)
noexcept
{
if
(
obj
)
{
// Allow duplicates from CAPI code since they shouldn't have to know
...
...
src/gc/collector.h
View file @
cea982c5
...
...
@@ -26,6 +26,8 @@ namespace gc {
// (Note: this marks the gc allocation itself, not the pointer that points to one. For that, use
// a GCRootHandle)
void
registerPermanentRoot
(
void
*
root_obj
,
bool
allow_duplicates
=
false
);
void
deregisterPermanentRoot
(
void
*
root_obj
);
// Register an object that was not allocated through this collector, as a root for this collector.
// The motivating usecase is statically-allocated PyTypeObject objects, which are full Python objects
// even if they are not heap allocated.
...
...
src/runtime/file.cpp
View file @
cea982c5
...
...
@@ -1134,7 +1134,7 @@ extern "C" void PyFile_SetFP(PyObject* _f, FILE* fp) noexcept {
f
->
f_fp
=
fp
;
}
extern
"C"
PyObject
*
PyFile_FromFile
(
FILE
*
fp
,
c
har
*
name
,
char
*
mode
,
int
(
*
close
)(
FILE
*
))
noexcept
{
extern
"C"
PyObject
*
PyFile_FromFile
(
FILE
*
fp
,
c
onst
char
*
name
,
const
char
*
mode
,
int
(
*
close
)(
FILE
*
))
noexcept
{
return
new
BoxedFile
(
fp
,
name
,
mode
,
close
);
}
...
...
test/tests/coding_cp1252.py
0 → 100644
View file @
cea982c5
# coding: cp1252
s
=
u""
print
ord
(
s
),
s
test/tests/coding_koi8.py
0 → 100644
View file @
cea982c5
# -*- coding: koi8-r -*-
def
test
(
s
):
print
s
,
"len:"
,
len
(
s
)
for
c
in
s
:
print
hex
(
ord
(
c
)),
print
""
test
(
u"ðÉÔÏÎ"
.
encode
(
"utf8"
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment