Commit bfe06380 authored by Boxiang Sun's avatar Boxiang Sun

Remove the "CPython executable parser"

Pyston call CPython executable to generate CPython AST. Remove this
"parser", use CPython Parser module by default. Pass "-x" to enable
libpypa parser.
parent 5f562c46
......@@ -320,9 +320,6 @@ add_executable(pyston $<TARGET_OBJECTS:PYSTON_MAIN_OBJECT> $<TARGET_OBJECTS:PYST
target_link_libraries(pyston -Wl,--whole-archive stdlib -Wl,--no-whole-archive pthread m z readline sqlite3 gmp mpfr ssl crypto unwind pypa liblz4 double-conversion util ${LLVM_LIBS} ${LIBLZMA_LIBRARIES} ${OPTIONAL_LIBRARIES} ${CMAKE_BINARY_DIR}/jemalloc/lib/libjemalloc.a)
add_dependencies(pyston libjemalloc)
# copy src/codegen/parse_ast.py to the build directory
add_custom_command(TARGET pyston POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_SOURCE_DIR}/src/codegen/parse_ast.py ${CMAKE_BINARY_DIR}/src/codegen/parse_ast.py)
add_custom_target(astcompare COMMAND ${CMAKE_SOURCE_DIR}/tools/astprint_test.sh
DEPENDS astprint
COMMENT "Running libpypa vs CPython AST result comparison test")
......@@ -342,9 +339,9 @@ endmacro()
# tests testname directory arguments
add_pyston_test(defaults tests --order-by-mtime -t50)
add_pyston_test(force_llvm tests -a=-n -a=-X -t90)
add_pyston_test(force_llvm tests -a=-n -a=-x -t90)
if(${CMAKE_BUILD_TYPE} STREQUAL "Release")
add_pyston_test(max_compilation_tier tests -a=-O -a=-X -t50)
add_pyston_test(max_compilation_tier tests -a=-O -a=-x -t50)
endif()
add_pyston_test(defaults cpython --exit-code-only --skip-failing -t100)
add_pyston_test(defaults integration --exit-code-only --skip-failing -t900)
......
......@@ -777,7 +777,7 @@ check$1 test$1: $(PYTHON_EXE_DEPS) pyston$1
@# we pass -I to cpython tests and skip failing ones because they are sloooow otherwise
$(PYTHON) $(TOOLS_DIR)/tester.py -R pyston$1 -j$(TEST_THREADS) -a=-S -k --exit-code-only --skip-failing -t50 $(TEST_DIR)/cpython $(ARGS)
$(PYTHON) $(TOOLS_DIR)/tester.py -R pyston$1 -j$(TEST_THREADS) -k -a=-S --exit-code-only --skip-failing -t600 $(TEST_DIR)/integration $(ARGS)
$(PYTHON) $(TOOLS_DIR)/tester.py -a=-X -R pyston$1 -j$(TEST_THREADS) -a=-n -a=-S -t50 -k $(TESTS_DIR) $(ARGS)
$(PYTHON) $(TOOLS_DIR)/tester.py -a=-x -R pyston$1 -j$(TEST_THREADS) -a=-n -a=-S -t50 -k $(TESTS_DIR) $(ARGS)
$(PYTHON) $(TOOLS_DIR)/tester.py -R pyston$1 -j$(TEST_THREADS) -a=-O -a=-S -k $(TESTS_DIR) $(ARGS)
.PHONY: run$1 dbg$1
......
......@@ -132,7 +132,7 @@ Pyston-specific flags:
<dd>Use a stripped stdlib. When running pyston_dbg, the default is to use a stdlib with full debugging symbols enabled. Passing -r changes this behavior to load a slimmer, stripped stdlib.</dd>
<dt>-x</dt>
<dd>Disable the pypa parser.</dd>
<dd>Enable the pypa parser.</dd>
Standard Python flags:
<dt>-i</dt>
......
import _ast
import struct
import sys
from types import NoneType
def _print_str(s, f):
assert len(s) < 2**32
f.write(struct.pack(">L", len(s)))
f.write(s)
TYPE_MAP = {
_ast.alias: 1,
_ast.arguments: 2,
_ast.Assert: 3,
_ast.Assign: 4,
_ast.Attribute: 5,
_ast.AugAssign: 6,
_ast.BinOp: 7,
_ast.BoolOp: 8,
_ast.Call: 9,
_ast.ClassDef: 10,
_ast.Compare: 11,
_ast.comprehension: 12,
_ast.Delete: 13,
_ast.Dict: 14,
_ast.Exec: 16,
_ast.ExceptHandler: 17,
_ast.ExtSlice: 18,
_ast.Expr: 19,
_ast.For: 20,
_ast.FunctionDef: 21,
_ast.GeneratorExp: 22,
_ast.Global: 23,
_ast.If: 24,
_ast.IfExp: 25,
_ast.Import: 26,
_ast.ImportFrom: 27,
_ast.Index: 28,
_ast.keyword: 29,
_ast.Lambda: 30,
_ast.List: 31,
_ast.ListComp: 32,
_ast.Module: 33,
_ast.Num: 34,
_ast.Name: 35,
_ast.Pass: 37,
_ast.Pow: 38,
_ast.Print: 39,
_ast.Raise: 40,
_ast.Repr: 41,
_ast.Return: 42,
_ast.Slice: 44,
_ast.Str: 45,
_ast.Subscript: 46,
_ast.TryExcept: 47,
_ast.TryFinally: 48,
_ast.Tuple: 49,
_ast.UnaryOp: 50,
_ast.With: 51,
_ast.While: 52,
_ast.Yield: 53,
_ast.Store: 54,
_ast.Load: 55,
_ast.Param: 56,
_ast.Not: 57,
_ast.In: 58,
_ast.Is: 59,
_ast.IsNot: 60,
_ast.Or: 61,
_ast.And: 62,
_ast.Eq: 63,
_ast.NotEq: 64,
_ast.NotIn: 65,
_ast.GtE: 66,
_ast.Gt: 67,
_ast.Mod: 68,
_ast.Add: 69,
_ast.Continue: 70,
_ast.Lt: 71,
_ast.LtE: 72,
_ast.Break: 73,
_ast.Sub: 74,
_ast.Del: 75,
_ast.Mult: 76,
_ast.Div: 77,
_ast.USub: 78,
_ast.BitAnd: 79,
_ast.BitOr: 80,
_ast.BitXor: 81,
_ast.RShift: 82,
_ast.LShift: 83,
_ast.Invert: 84,
_ast.UAdd: 85,
_ast.FloorDiv: 86,
_ast.Ellipsis: 87,
_ast.Expression: 88,
_ast.SetComp: 89,
}
if sys.version_info >= (2,7):
TYPE_MAP[_ast.DictComp] = 15
TYPE_MAP[_ast.Set] = 43
def convert(n, f):
assert n is None or isinstance(n, _ast.AST), repr(n)
type_idx = TYPE_MAP[type(n)] if n else 0
f.write(struct.pack(">B", type_idx))
if n is None:
return
if isinstance(n, (_ast.operator, _ast.expr_context, _ast.boolop, _ast.cmpop, _ast.unaryop)):
return
f.write('\xae')
if isinstance(n, _ast.Num):
if isinstance(n.n, int):
f.write('\x10')
elif isinstance(n.n, long):
f.write('\x30')
elif isinstance(n.n, float):
f.write('\x20')
elif isinstance(n.n, complex):
f.write('\x40')
else:
raise Exception(type(n.n))
if isinstance(n, _ast.Str):
if isinstance(n.s, str):
f.write('\x10')
elif isinstance(n.s, unicode):
f.write('\x20')
else:
raise Exception(type(n.s))
# print >>sys.stderr, n, sorted(n.__dict__.items())
for k, v in sorted(n.__dict__.items()):
if k.startswith('_'):
continue
if k in ("vararg", "kwarg", "asname", "module") and v is None:
v = ""
# elif k in ('col_offset', 'lineno'):
# continue
if isinstance(v, list):
assert len(v) < 2**16
f.write(struct.pack(">H", len(v)))
if isinstance(n, _ast.Global):
assert k == "names"
for el in v:
_print_str(el, f)
else:
for el in v:
convert(el, f)
elif isinstance(v, str):
_print_str(v, f)
elif isinstance(v, unicode):
_print_str(v.encode("utf8"), f)
elif isinstance(v, bool):
f.write(struct.pack("B", v))
elif isinstance(v, int):
f.write(struct.pack(">q", v))
elif isinstance(v, long):
_print_str(str(v), f)
elif isinstance(v, float):
f.write(struct.pack(">d", v))
elif isinstance(v, complex):
# Complex constants can only be pure imaginary
# (e.g., in 1+0j, 1 and 0j are separate literals)
assert v.real == 0.0
f.write(struct.pack(">d", v.imag))
elif v is None or isinstance(v, _ast.AST):
convert(v, f)
else:
raise Exception((n, k, repr(v)))
if __name__ == "__main__":
import time
start = time.time()
fn = sys.argv[1]
s = open(fn).read()
m = compile(s, fn, "exec", _ast.PyCF_ONLY_AST)
convert(m, sys.stdout)
......@@ -998,119 +998,53 @@ AST* readASTMisc(BufferedReader* reader) {
}
}
static std::string getParserCommandLine(const char* fn) {
llvm::SmallString<128> parse_ast_fn;
// TODO supposed to pass argv0, main_addr to this function:
parse_ast_fn = llvm::sys::fs::getMainExecutable(NULL, NULL);
assert(parse_ast_fn.size() && "could not find the path to the pyston src dir");
// Start by removing the binary name, because the "pyston" binary will break the logic below
llvm::sys::path::remove_filename(parse_ast_fn);
llvm::sys::path::append(parse_ast_fn, "src/codegen/parse_ast.py");
// We may be running in an environment where "python" resolves to pyston (ex in
// a virtualenv), so try to hard code the path to CPython.
// This should probably be a configure-time check?
return std::string("/usr/bin/python -S ") + parse_ast_fn.str().str() + " " + fn;
}
AST_Module* parse_string(const char* code, FutureFlags inherited_flags) {
inherited_flags &= ~(CO_NESTED | CO_FUTURE_DIVISION);
if (ENABLE_CPYTHON_PARSER) {
PyCompilerFlags cf;
cf.cf_flags = inherited_flags;
ArenaWrapper arena;
assert(arena);
const char* fn = "<string>";
mod_ty mod = PyParser_ASTFromString(code, fn, Py_file_input, &cf, arena);
if (!mod)
throwCAPIException();
assert(mod->kind != Interactive_kind);
auto rtn = static_cast<AST_Module*>(cpythonToPystonAST(mod, fn));
return rtn;
}
if (ENABLE_PYPA_PARSER || inherited_flags) {
if (ENABLE_PYPA_PARSER) {
AST_Module* rtn = pypa_parse_string(code, inherited_flags);
RELEASE_ASSERT(rtn, "unknown parse error (possibly: '%s'?)", strerror(errno));
return rtn;
}
RELEASE_ASSERT(!inherited_flags, "the old cpython parser doesn't support specifying initial future flags");
int size = strlen(code);
char buf[] = "pystontmp_XXXXXX";
char* tmpdir = mkdtemp(buf);
assert(tmpdir);
std::string tmp = std::string(tmpdir) + "/in.py";
if (VERBOSITY() >= 3) {
printf("writing %d bytes to %s\n", size, tmp.c_str());
}
{
FileHandle f(tmp.c_str(), "w");
fwrite(code, 1, size, f);
fputc('\n', f);
}
AST_Module* m = parse_file(tmp.c_str(), inherited_flags);
removeDirectoryIfExists(tmpdir);
return m;
PyCompilerFlags cf;
cf.cf_flags = inherited_flags;
ArenaWrapper arena;
assert(arena);
const char* fn = "<string>";
mod_ty mod = PyParser_ASTFromString(code, fn, Py_file_input, &cf, arena);
if (!mod)
throwCAPIException();
assert(mod->kind != Interactive_kind);
auto rtn = static_cast<AST_Module*>(cpythonToPystonAST(mod, fn));
return rtn;
}
AST_Module* parse_file(const char* fn, FutureFlags inherited_flags) {
Timer _t("parsing");
if (ENABLE_CPYTHON_PARSER) {
FileHandle fp(fn, "r");
PyCompilerFlags cf;
cf.cf_flags = inherited_flags;
ArenaWrapper arena;
assert(arena);
mod_ty mod = PyParser_ASTFromFile(fp, fn, Py_file_input, 0, 0, &cf, NULL, arena);
if (!mod)
throwCAPIException();
assert(mod->kind != Interactive_kind);
auto rtn = static_cast<AST_Module*>(cpythonToPystonAST(mod, fn));
return rtn;
}
if (ENABLE_PYPA_PARSER) {
AST_Module* rtn = pypa_parse(fn, inherited_flags);
RELEASE_ASSERT(rtn, "unknown parse error (possibly: '%s'?)", strerror(errno));
return rtn;
}
FILE* fp = popen(getParserCommandLine(fn).c_str(), "r");
BufferedReader* reader = new BufferedReader(fp);
AST* rtn = readASTMisc(reader);
reader->fill();
ASSERT(reader->bytesBuffered() == 0, "%d", reader->bytesBuffered());
delete reader;
int code = pclose(fp);
assert(code == 0);
assert(rtn->type == AST_TYPE::Module);
long us = _t.end();
static StatCounter us_parsing("us_parsing");
us_parsing.log(us);
return ast_cast<AST_Module>(rtn);
FileHandle fp(fn, "r");
PyCompilerFlags cf;
cf.cf_flags = inherited_flags;
ArenaWrapper arena;
assert(arena);
mod_ty mod = PyParser_ASTFromFile(fp, fn, Py_file_input, 0, 0, &cf, NULL, arena);
if (!mod)
throwCAPIException();
assert(mod->kind != Interactive_kind);
auto rtn = static_cast<AST_Module*>(cpythonToPystonAST(mod, fn));
return rtn;
}
const char* getMagic() {
if (ENABLE_CPYTHON_PARSER)
return "a\nCQ";
else if (ENABLE_PYPA_PARSER)
if (ENABLE_PYPA_PARSER)
return "a\ncQ";
else
return "a\ncq";
return "a\nCQ";
}
#define MAGIC_STRING_LENGTH 4
......@@ -1150,51 +1084,29 @@ static std::vector<char> _reparse(const char* fn, const std::string& cache_fn, A
file_data.insert(file_data.end(), (char*)&checksum, (char*)&checksum + CHECKSUM_LENGTH);
checksum = 0;
if (ENABLE_CPYTHON_PARSER || ENABLE_PYPA_PARSER || inherited_flags) {
if (ENABLE_CPYTHON_PARSER) {
FileHandle fp(fn, "r");
PyCompilerFlags cf;
cf.cf_flags = inherited_flags;
ArenaWrapper arena;
assert(arena);
mod_ty mod = PyParser_ASTFromFile(fp, fn, Py_file_input, 0, 0, &cf, NULL, arena);
if (!mod)
throwCAPIException();
assert(mod->kind != Interactive_kind);
module = static_cast<AST_Module*>(cpythonToPystonAST(mod, fn));
} else {
module = pypa_parse(fn, inherited_flags);
RELEASE_ASSERT(module, "unknown parse error");
}
if (!cache_fp)
return std::vector<char>();
auto p = serializeAST(module, cache_fp);
checksum = p.second;
bytes_written += p.first;
if (ENABLE_PYPA_PARSER) {
module = pypa_parse(fn, inherited_flags);
RELEASE_ASSERT(module, "unknown parse error");
} else {
RELEASE_ASSERT(!inherited_flags, "the old cpython parser doesn't support specifying initial future flags");
FILE* parser = popen(getParserCommandLine(fn).c_str(), "r");
char buf[80];
while (true) {
int nread = fread(buf, 1, 80, parser);
if (nread == 0)
break;
bytes_written += nread;
if (cache_fp)
fwrite(buf, 1, nread, cache_fp);
file_data.insert(file_data.end(), buf, buf + nread);
for (int i = 0; i < nread; i++) {
checksum ^= buf[i];
}
}
int code = pclose(parser);
assert(code == 0);
FileHandle fp(fn, "r");
PyCompilerFlags cf;
cf.cf_flags = inherited_flags;
ArenaWrapper arena;
assert(arena);
mod_ty mod = PyParser_ASTFromFile(fp, fn, Py_file_input, 0, 0, &cf, NULL, arena);
if (!mod)
throwCAPIException();
assert(mod->kind != Interactive_kind);
module = static_cast<AST_Module*>(cpythonToPystonAST(mod, fn));
}
if (!cache_fp)
return std::vector<char>();
auto p = serializeAST(module, cache_fp);
checksum = p.second;
bytes_written += p.first;
fseek(cache_fp, checksum_start, SEEK_SET);
if (cache_fp)
fwrite(&bytes_written, 1, LENGTH_LENGTH, cache_fp);
......
......@@ -38,8 +38,7 @@ bool PROFILE = false;
bool DUMPJIT = false;
bool TRAP = false;
bool USE_STRIPPED_STDLIB = true; // always true
bool ENABLE_PYPA_PARSER = true;
bool ENABLE_CPYTHON_PARSER = true;
bool ENABLE_PYPA_PARSER = false;
bool USE_REGALLOC_BASIC = false;
bool PAUSE_AT_ABORT = false;
bool ENABLE_TRACEBACKS = true;
......
......@@ -36,8 +36,8 @@ extern int SPECULATION_THRESHOLD;
extern int MAX_OBJECT_CACHE_ENTRIES;
extern bool SHOW_DISASM, FORCE_INTERPRETER, FORCE_OPTIMIZE, PROFILE, DUMPJIT, TRAP, USE_STRIPPED_STDLIB,
CONTINUE_AFTER_FATAL, ENABLE_INTERPRETER, ENABLE_BASELINEJIT, ENABLE_PYPA_PARSER, ENABLE_CPYTHON_PARSER,
USE_REGALLOC_BASIC, PAUSE_AT_ABORT, ENABLE_TRACEBACKS, FORCE_LLVM_CAPI_CALLS, FORCE_LLVM_CAPI_THROWS;
CONTINUE_AFTER_FATAL, ENABLE_INTERPRETER, ENABLE_BASELINEJIT, ENABLE_PYPA_PARSER, USE_REGALLOC_BASIC,
PAUSE_AT_ABORT, ENABLE_TRACEBACKS, FORCE_LLVM_CAPI_CALLS, FORCE_LLVM_CAPI_THROWS;
extern bool LOG_IC_ASSEMBLY, LOG_BJIT_ASSEMBLY;
......
......@@ -220,9 +220,7 @@ int handleArg(char code) {
} else if (code == 'b') {
USE_REGALLOC_BASIC = false;
} else if (code == 'x') {
ENABLE_PYPA_PARSER = false;
} else if (code == 'X') {
ENABLE_CPYTHON_PARSER = false;
ENABLE_PYPA_PARSER = true;
} else if (code == 'E') {
Py_IgnoreEnvironmentFlag = 1;
} else if (code == 'P') {
......
# fail-if: '-x' in EXTRA_JIT_ARGS
# - we don't get syntax errors through the old parser correctly
try:
exec ";"
print "worked?"
except SyntaxError:
pass
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment