Commit 46d6bba3 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #619 from kmod/sre_compile4

Rewrite "a in b" expressions
parents c46e70bf a8163914
import sre_compile
# Every regular expression created during django's startup:
patterns = [
'',
'\n \\$(?:\n (?P<escaped>\\$) | # Escape sequence of two delimiters\n (?P<named>[_a-z][_a-z0-9]*) | # delimiter and a Python identifier\n {(?P<braced>[_a-z][_a-z0-9]*)} | # delimiter and a braced identifier\n (?P<invalid>) # Other ill-formed delimiter exprs\n )\n ',
'^[a-zA-Z](?:[a-zA-Z0-9-]*)$',
'^([a-zA-Z](?:[a-zA-Z0-9-]*))=!([a-zA-Z](?:[a-zA-Z0-9-]*))$',
'^[a-zA-Z]([a-zA-Z0-9_]*)$',
'\\[(?P<header>[^]]+)\\]',
'(?P<option>[^:=\\s][^:=]*)\\s*(?P<vi>[:=])\\s*(?P<value>.*)$',
'(?P<option>[^:=\\s][^:=]*)\\s*(?:(?P<vi>[:=])\\s*(?P<value>.*))?$',
'%\\(([^)]*)\\)s|.',
'%\\(([^)]+)\\)s',
'([a-zA-Z][a-zA-Z0-9_]+)\\s*=\\s*(.*)',
'\\$\\(([A-Za-z][A-Za-z0-9_]*)\\)',
'\\${([A-Za-z][A-Za-z0-9_]*)}',
'^[a-zA-Z_][a-zA-Z_0-9]*(\\.[a-zA-Z_][a-zA-Z_0-9]*)*$',
'cygwin.*',
'os2emx',
'posix',
'[^\\\\\\\'\\"\t\n\x0b\x0c\r ]*',
"'(?:[^'\\\\]|\\\\.)*'",
'"(?:[^"\\\\]|\\\\.)*"',
'',
'sys.exc_clear',
'threading',
' # A numeric string consists of:\n# \\s*\n (?P<sign>[-+])? # an optional sign, followed by either...\n (\n (?=\\d|\\.\\d) # ...a number (with at least one digit)\n (?P<int>\\d*) # having a (possibly empty) integer part\n (\\.(?P<frac>\\d*))? # followed by an optional fractional part\n (E(?P<exp>[-+]?\\d+))? # followed by an optional exponent, or...\n |\n Inf(inity)? # ...an infinity, or...\n |\n (?P<signal>s)? # ...an (optionally signaling)\n NaN # NaN\n (?P<diag>\\d*) # with (possibly empty) diagnostic info.\n )\n# \\s*\n \\Z\n',
'0*$',
'50*$',
'\\A\n(?:\n (?P<fill>.)?\n (?P<align>[<>=^])\n)?\n(?P<sign>[-+ ])?\n(?P<zeropad>0)?\n(?P<minimumwidth>(?!0)\\d+)?\n(?P<thousands_sep>,)?\n(?:\\.(?P<precision>0|(?!0)\\d+))?\n(?P<type>[eEfFgGn%])?\n\\Z\n',
'%(?:\\((?P<key>.*?)\\))?(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]',
'\n \\$(?:\n (?P<escaped>\\$) | # Escape sequence of two delimiters\n (?P<named>[_a-z][_a-z0-9]*) | # delimiter and a Python identifier\n {(?P<braced>[_a-z][_a-z0-9]*)} | # delimiter and a braced identifier\n (?P<invalid>) # Other ill-formed delimiter exprs\n )\n ',
'[ \\f\\t]*(\\\\\\r?\\n[ \\f\\t]*)*(#[^\\r\\n]*)?(((\\d+[jJ]|((\\d+\\.\\d*|\\.\\d+)([eE][-+]?\\d+)?|\\d+[eE][-+]?\\d+)[jJ])|((\\d+\\.\\d*|\\.\\d+)([eE][-+]?\\d+)?|\\d+[eE][-+]?\\d+)|(0[xX][\\da-fA-F]+[lL]?|0[bB][01]+[lL]?|(0[oO][0-7]+)|(0[0-7]*)[lL]?|[1-9]\\d*[lL]?))|((\\*\\*=?|>>=?|<<=?|<>|!=|//=?|[+\\-*/%&|^=<>]=?|~)|[][(){}]|(\\r?\\n|[:;.,`@]))|([uUbB]?[rR]?\'[^\\n\'\\\\]*(?:\\\\.[^\\n\'\\\\]*)*\'|[uUbB]?[rR]?"[^\\n"\\\\]*(?:\\\\.[^\\n"\\\\]*)*")|[a-zA-Z_]\\w*)',
'[ \\f\\t]*((\\\\\\r?\\n|\\Z|#[^\\r\\n]*|([uUbB]?[rR]?\'\'\'|[uUbB]?[rR]?"""))|((\\d+[jJ]|((\\d+\\.\\d*|\\.\\d+)([eE][-+]?\\d+)?|\\d+[eE][-+]?\\d+)[jJ])|((\\d+\\.\\d*|\\.\\d+)([eE][-+]?\\d+)?|\\d+[eE][-+]?\\d+)|(0[xX][\\da-fA-F]+[lL]?|0[bB][01]+[lL]?|(0[oO][0-7]+)|(0[0-7]*)[lL]?|[1-9]\\d*[lL]?))|((\\*\\*=?|>>=?|<<=?|<>|!=|//=?|[+\\-*/%&|^=<>]=?|~)|[][(){}]|(\\r?\\n|[:;.,`@]))|([uUbB]?[rR]?\'[^\\n\'\\\\]*(?:\\\\.[^\\n\'\\\\]*)*(\'|\\\\\\r?\\n)|[uUbB]?[rR]?"[^\\n"\\\\]*(?:\\\\.[^\\n"\\\\]*)*("|\\\\\\r?\\n))|[a-zA-Z_]\\w*)',
"[^'\\\\]*(?:(?:\\\\.|'(?!''))[^'\\\\]*)*'''",
'[^"\\\\]*(?:(?:\\\\.|"(?!""))[^"\\\\]*)*"""',
"[^'\\\\]*(?:\\\\.[^'\\\\]*)*'",
'[^"\\\\]*(?:\\\\.[^"\\\\]*)*"',
'([\x00-\x7f]+)',
'(\\s+|[^\\s\\w]*\\w+[^0-9\\W]-(?=\\w+[^0-9\\W])|(?<=[\\w\\!\\"\\\'\\&\\.\\,\\?])-{2,}(?=\\w))',
'(\\s+)',
'[abcdefghijklmnopqrstuvwxyz][\\.\\!\\?][\\"\\\']?\\Z',
'^[ \t]+$',
'(^[ \t]*)(?:[^ \t\n])',
'[A-Z][A-Z0-9_]+$',
'\\\\[0-3][0-7][0-7]',
'[\\\\].',
'(?x)(?P<key>[\\w\\d!#%&\'~_`><@,:/\\$\\*\\+\\-\\.\\^\\|\\)\\(\\?\\}\\{\\=]+?)\\s*=\\s*(?P<val>"(?:[^\\\\"]|\\\\.)*"|\\w{3},\\s[\\s\\w\\d-]{9,11}\\s[\\d:]{8}\\sGMT|[\\w\\d!#%&\'~_`><@,:/\\$\\*\\+\\-\\.\\^\\|\\)\\(\\?\\}\\{\\=]*)\\s*;?',
'(-?(?:0|[1-9]\\d*))(\\.\\d+)?([eE][-+]?\\d+)?',
'(.*?)(["\\\\\\x00-\\x1f])',
'[ \\t\\n\\r]*',
'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t]',
'([\\\\"]|[^\\ -~])',
'[\\x80-\\xff]',
u'\\s*\n\\s*',
u'<.*?>|((?:\\w[-\\w]*|&.*?;)+)',
u'<.*?>|(.)',
u'<(/)?([^ ]+?)(?:(\\s*/)| .*?)?>',
u'\\r\\n|\\r',
u'(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))',
u'([\x80-\uffff])',
u'\n ((?:\n [^\\s\'"]*\n (?:\n (?:"(?:[^"\\\\]|\\\\.)*" | \'(?:[^\'\\\\]|\\\\.)*\')\n [^\\s\'"]*\n )+\n ) | \\S+)\n',
u'&(#?[xX]?(?:[0-9a-fA-F]+|\\w{1,8}));',
u'^https?://',
u'^([a-z0-9.-]+|\\[[a-f0-9]*:[a-f0-9:]+\\])(:\\d+)?$',
'[][\\\\()<>@,:;".]',
'[][\\\\()"]',
'\n =\\? # literal =?\n (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset\n \\? # literal ?\n (?P<encoding>[qb]) # either a "q" or a "b", case insensitive\n \\? # literal ?\n (?P<atom>.*?) # non-greedy up to the next ?= is the atom\n \\?= # literal ?=\n ',
'^(?P<name>\\w+)\\*((?P<num>[0-9]+)\\*?)?$',
'[^-a-zA-Z0-9!*+/ ]',
'[^ !-<>-~\\t]',
'\n =\\? # literal =?\n (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset\n \\? # literal ?\n (?P<encoding>[qb]) # either a "q" or a "b", case insensitive\n \\? # literal ?\n (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string\n \\?= # literal ?=\n (?=[ \\t]|$) # whitespace or the end of the string\n ',
'[\\041-\\176]+:$',
'\\n[^ \\t]+:',
u'^(?:[a-z0-9\\.\\-]*)://(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\\.)+(?:[A-Z]{2,6}\\.?|[A-Z0-9-]{2,}(?<!-)\\.?)|localhost|\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}|\\[?[A-F0-9]*:[A-F0-9:]+\\]?)(?::\\d+)?(?:/?|[/?]\\S+)$',
u'(^[-!#$%&\'*+/=?^_`{}|~0-9A-Z]+(\\.[-!#$%&\'*+/=?^_`{}|~0-9A-Z]+)*$|^"([\\001-\\010\\013\\014\\016-\\037!#-\\[\\]-\\177]|\\\\[\\001-\\011\\013\\014\\016-\\177])*"$)',
u'(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\\.)+(?:[A-Z]{2,6}|[A-Z0-9-]{2,}(?<!-))$',
u'\\[([A-f0-9:\\.]+)\\]$',
u'^[-a-zA-Z0-9_]+$',
u'^(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}$',
u'^[\\d,]+$',
'[a-zA-Z][-_.a-zA-Z0-9]*\\s*',
'(\\\'[^\\\']*\\\'|"[^"]*")\\s*',
'--\\s*>',
']\\s*]\\s*>',
']\\s*>',
'[&<]',
'&[a-zA-Z#]',
'&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]',
'&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]',
'<[a-zA-Z]',
'>',
'([a-zA-Z][^\t\n\r\x0c />\x00]*)(?:\\s|/(?!>))*',
'[a-zA-Z][^\t\n\r\x0c />\x00]*',
'((?<=[\\\'"\\s/])[^\\s/>][^\\s/=>]*)(\\s*=+\\s*(\\\'[^\\\']*\\\'|"[^"]*"|(?![\\\'"])[^>\\s]*))?(?:\\s|/(?!>))*',
'\n <[a-zA-Z][^\\t\\n\\r\\f />\\x00]* # tag name\n (?:[\\s/]* # optional whitespace before attribute name\n (?:(?<=[\'"\\s/])[^\\s/>][^\\s/=>]* # attribute name\n (?:\\s*=+\\s* # value indicator\n (?:\'[^\']*\' # LITA-enclosed value\n |"[^"]*" # LIT-enclosed value\n |(?![\'"])[^>\\s]* # bare value\n )\n )?(?:\\s|/(?!>))*\n )*\n )?\n \\s* # trailing whitespace\n',
'</\\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\\s*>',
u'&(?!(\\w+|#\\d+);)',
u'(\\s+)',
u'^https?://\\[?\\w',
u'^www\\.|^(?!http)\\w[^@]+\\.(com|edu|gov|int|mil|net|org)$',
u'^\\S+@\\S+\\.\\S+$',
u'(<a [^>]*?)target=[^\\s>]+',
u'(?:<br clear="all">|<i><\\/i>|<b><\\/b>|<em><\\/em>|<strong><\\/strong>|<\\/?smallcaps>|<\\/?uppercase>)',
u'((?:<p>(?:\\&middot\\;|\\*|\\\u2022|\\&\\#149\\;|\\&bull\\;|\\&\\#8226\\;).*?[a-zA-Z].*?</p>\\s*)+)',
u'(?:<p>(?:&nbsp;|\\s|<br \\/>)*?</p>\\s*)+\\Z',
u'(?<!\\\\)([aAbBcdDeEfFgGhHiIjlLmMnNoOPrsStTUuwWyYzZ])',
u'\\\\(.)',
'((^|[^%])(%%)*%[sy])',
'(?P<year>\\d{4})-(?P<month>\\d{1,2})-(?P<day>\\d{1,2})$',
'(?P<hour>\\d{1,2}):(?P<minute>\\d{1,2})(?::(?P<second>\\d{1,2})(?:\\.(?P<microsecond>\\d{1,6})\\d{0,6})?)?',
'(?P<year>\\d{4})-(?P<month>\\d{1,2})-(?P<day>\\d{1,2})[T ](?P<hour>\\d{1,2}):(?P<minute>\\d{1,2})(?::(?P<second>\\d{1,2})(?:\\.(?P<microsecond>\\d{1,6})\\d{0,6})?)?(?P<tzinfo>Z|[+-]\\d{2}(?::?\\d{2})?)?$',
'\\?|[-+]?[.\\w]+$',
u'(?:W/)?"((?:\\\\.|[^"])*)"',
u'^\\w{3}, (?P<day>\\d{2}) (?P<mon>\\w{3}) (?P<year>\\d{4}) (?P<hour>\\d{2}):(?P<min>\\d{2}):(?P<sec>\\d{2}) GMT$',
u'^\\w{6,9}, (?P<day>\\d{2})-(?P<mon>\\w{3})-(?P<year>\\d{2}) (?P<hour>\\d{2}):(?P<min>\\d{2}):(?P<sec>\\d{2}) GMT$',
u'^\\w{3} (?P<mon>\\w{3}) (?P<day>[ \\d]\\d) (?P<hour>\\d{2}):(?P<min>\\d{2}):(?P<sec>\\d{2}) (?P<year>\\d{4})$',
u'\\s*,\\s*',
'^From ',
'[ \\(\\)<>@,;:\\\\"/\\[\\]\\?=]',
u'(\\{\\%.*?\\%\\}|\\{\\{.*?\\}\\}|\\{\\#.*?\\#\\})',
u'\n^(?P<constant>(?:\\_\\("[^"\\\\]*(?:\\\\.[^"\\\\]*)*"\\)|\\_\\(\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*\'\\)|"[^"\\\\]*(?:\\\\.[^"\\\\]*)*"|\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*\'))|\n^(?P<var>[\\w\\.]+|[-+\\.]?\\d[\\d\\.e]*)|\n (?:\\s*\\|\\s*\n (?P<filter_name>\\w+)\n (?:\\:\n (?:\n (?P<constant_arg>(?:\\_\\("[^"\\\\]*(?:\\\\.[^"\\\\]*)*"\\)|\\_\\(\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*\'\\)|"[^"\\\\]*(?:\\\\.[^"\\\\]*)*"|\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*\'))|\n (?P<var_arg>[\\w\\.]+|[-+\\.]?\\d[\\d\\.e]*)\n )\n )?\n )',
u'(?:(\\w+)=)?(.+)',
u'API|TOKEN|KEY|SECRET|PASS|PROFANITIES_LIST|SIGNATURE',
'\\s*#?\\s*$',
'[_a-z]\\w*\\.py$',
u'.*; charset=([\\w\\d-]+);?',
'[ \\(\\)<>@,;:\\\\"/\\[\\]\\?=]',
u'\\s+',
u'^[\\w.@+-]+$',
]
for pattern in patterns:
sre_compile.compile(pattern, 0)
...@@ -1007,7 +1007,7 @@ std::string getCurrentPythonLine() { ...@@ -1007,7 +1007,7 @@ std::string getCurrentPythonLine() {
return "unknown:-1"; return "unknown:-1";
} }
void logByCurrentPythonLine(std::string& stat_name) { void logByCurrentPythonLine(const std::string& stat_name) {
std::string stat = stat_name + "<" + getCurrentPythonLine() + ">"; std::string stat = stat_name + "<" + getCurrentPythonLine() + ">";
Stats::log(Stats::getStatCounter(stat)); Stats::log(Stats::getStatCounter(stat));
} }
......
...@@ -46,7 +46,7 @@ ExecutionPoint getExecutionPoint(); ...@@ -46,7 +46,7 @@ ExecutionPoint getExecutionPoint();
std::string getCurrentPythonLine(); std::string getCurrentPythonLine();
// doesn't really belong in unwinding.h, since it's stats related, but it needs to unwind to get the current line... // doesn't really belong in unwinding.h, since it's stats related, but it needs to unwind to get the current line...
void logByCurrentPythonLine(std::string& stat_name); void logByCurrentPythonLine(const std::string& stat_name);
// Adds stack locals and closure locals into the locals dict, and returns it. // Adds stack locals and closure locals into the locals dict, and returns it.
Box* fastLocalsToBoxedLocals(); Box* fastLocalsToBoxedLocals();
......
...@@ -3751,6 +3751,19 @@ static bool convert3wayCompareResultToBool(Box* v, int op_type) { ...@@ -3751,6 +3751,19 @@ static bool convert3wayCompareResultToBool(Box* v, int op_type) {
}; };
} }
Box* nonzeroAndBox(Box* b, bool negate) {
if (likely(b->cls == bool_cls)) {
if (negate)
return boxBool(b != True);
return b;
}
bool t = b->nonzeroIC();
if (negate)
t = !t;
return boxBool(t);
}
Box* compareInternal(Box* lhs, Box* rhs, int op_type, CompareRewriteArgs* rewrite_args) { Box* compareInternal(Box* lhs, Box* rhs, int op_type, CompareRewriteArgs* rewrite_args) {
if (op_type == AST_TYPE::Is || op_type == AST_TYPE::IsNot) { if (op_type == AST_TYPE::Is || op_type == AST_TYPE::IsNot) {
bool neg = (op_type == AST_TYPE::IsNot); bool neg = (op_type == AST_TYPE::IsNot);
...@@ -3766,11 +3779,26 @@ Box* compareInternal(Box* lhs, Box* rhs, int op_type, CompareRewriteArgs* rewrit ...@@ -3766,11 +3779,26 @@ Box* compareInternal(Box* lhs, Box* rhs, int op_type, CompareRewriteArgs* rewrit
} }
if (op_type == AST_TYPE::In || op_type == AST_TYPE::NotIn) { if (op_type == AST_TYPE::In || op_type == AST_TYPE::NotIn) {
// TODO do rewrite
static BoxedString* contains_str = static_cast<BoxedString*>(PyString_InternFromString("__contains__")); static BoxedString* contains_str = static_cast<BoxedString*>(PyString_InternFromString("__contains__"));
Box* contained = callattrInternal1(rhs, contains_str, CLASS_ONLY, NULL, ArgPassSpec(1), lhs);
Box* contained;
RewriterVar* r_contained;
if (rewrite_args) {
CallRewriteArgs crewrite_args(rewrite_args->rewriter, rewrite_args->rhs, rewrite_args->destination);
crewrite_args.arg1 = rewrite_args->lhs;
contained = callattrInternal1(rhs, contains_str, CLASS_ONLY, &crewrite_args, ArgPassSpec(1), lhs);
if (!crewrite_args.out_success)
rewrite_args = NULL;
else if (contained)
r_contained = crewrite_args.out_rtn;
} else {
contained = callattrInternal1(rhs, contains_str, CLASS_ONLY, NULL, ArgPassSpec(1), lhs);
}
if (contained == NULL) { if (contained == NULL) {
rewrite_args = NULL;
int result = _PySequence_IterSearch(rhs, lhs, PY_ITERSEARCH_CONTAINS); int result = _PySequence_IterSearch(rhs, lhs, PY_ITERSEARCH_CONTAINS);
if (result < 0) if (result < 0)
throwCAPIException(); throwCAPIException();
...@@ -3778,6 +3806,14 @@ Box* compareInternal(Box* lhs, Box* rhs, int op_type, CompareRewriteArgs* rewrit ...@@ -3778,6 +3806,14 @@ Box* compareInternal(Box* lhs, Box* rhs, int op_type, CompareRewriteArgs* rewrit
return boxBool(result); return boxBool(result);
} }
if (rewrite_args) {
auto r_negate = rewrite_args->rewriter->loadConst((int)(op_type == AST_TYPE::NotIn));
RewriterVar* r_contained_box
= rewrite_args->rewriter->call(true, (void*)nonzeroAndBox, r_contained, r_negate);
rewrite_args->out_rtn = r_contained_box;
rewrite_args->out_success = true;
}
bool b; bool b;
if (contained->cls == bool_cls) if (contained->cls == bool_cls)
b = contained == True; b = contained == True;
......
...@@ -410,7 +410,7 @@ Box* setPop(BoxedSet* self) { ...@@ -410,7 +410,7 @@ Box* setPop(BoxedSet* self) {
Box* setContains(BoxedSet* self, Box* v) { Box* setContains(BoxedSet* self, Box* v) {
RELEASE_ASSERT(PyAnySet_Check(self), ""); RELEASE_ASSERT(PyAnySet_Check(self), "");
return boxBool(self->s.count(v) != 0); return boxBool(self->s.find(v) != self->s.end());
} }
Box* setEq(BoxedSet* self, BoxedSet* rhs) { Box* setEq(BoxedSet* self, BoxedSet* rhs) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment