Merge pull request #712 from kmod/perf2

some fixes and cleanups

Merge pull request #712 from kmod/perf2
some fixes and cleanups
baad8901 · Kevin Modzelewski · 6e2c06a8 · 5385cf7b · baad8901 · baad8901
Commit baad8901 authored Jul 16, 2015 by Kevin Modzelewski
19 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -23,6 +23,7 @@ pyston_grwl
 pyston_grwl_dbg
 pyston_nosync
 pyston_gcc
+pyston_release_gcc
 pystontmp*/
 /*_unittest

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -299,3 +299,6 @@ if(DOXYGEN_FOUND)
 else()
  add_custom_target(docs COMMAND ${CMAKE_COMMAND} -E echo "Can't create docs, doxygen not installed \(try sudo apt-get install doxygen grpahviz on Ubuntu and then rerun cmake\)" VERBATIM)
 endif()
+# last file added (need to change this if we add a file that is added via a glob):
+# from_cpython/Lib/test/test_re.py
--- a/Makefile
+++ b/Makefile
@@ -20,6 +20,7 @@ USE_CCACHE := 1
 USE_DISTCC := 0
 PYPY := pypy
+CPYTHON := python
 ENABLE_VALGRIND := 0
@@ -72,7 +73,7 @@ CMAKE_SETUP_RELEASE := $(CMAKE_DIR_RELEASE)/build.ninja
 ifneq ($(SELF_HOST),1)
-	PYTHON := python
+	PYTHON := $(CPYTHON)
 	PYTHON_EXE_DEPS :=
 else
 	PYTHON := $(abspath ./pyston_dbg)
@@ -1032,7 +1033,7 @@ $(call make_target,_gcc)
 $(call make_target,_release_gcc)
 nosearch_runpy_% nosearch_pyrun_%: %.py ext_python
-	$(VERB) PYTHONPATH=test/test_extension/build/lib.linux-x86_64-2.7 zsh -c 'time python $<'
+	$(VERB) PYTHONPATH=test/test_extension/build/lib.linux-x86_64-2.7 zsh -c 'time $(CPYTHON) $<'
 nosearch_pypyrun_%: %.py ext_python
 	$(VERB) PYTHONPATH=test/test_extension/build/lib.linux-x86_64-2.7 zsh -c 'time $(PYPY) $<'
 $(call make_search,runpy_%)
@@ -1203,7 +1204,7 @@ $(wordlist 2,9999,$(SHAREDMODS_OBJS)): $(firstword $(SHAREDMODS_OBJS))
 .PHONY: ext_python ext_pythondbg
 ext_python: $(TEST_EXT_MODULE_SRCS)
-	cd $(TEST_DIR)/test_extension; python setup.py build
+	cd $(TEST_DIR)/test_extension; $(CPYTHON) setup.py build
 ext_pythondbg: $(TEST_EXT_MODULE_SRCS)
 	cd $(TEST_DIR)/test_extension; python2.7-dbg setup.py build

--- a/from_cpython/Lib/test/re_tests.py
+++ b/from_cpython/Lib/test/re_tests.py
+#!/usr/bin/env python
+# -*- mode: python -*-
+# Re test suite and benchmark suite v1.5
+# The 3 possible outcomes for each pattern
+[SUCCEED, FAIL, SYNTAX_ERROR] = range(3)
+# Benchmark suite (needs expansion)
+#
+# The benchmark suite does not test correctness, just speed.  The
+# first element of each tuple is the regex pattern; the second is a
+# string to match it against.  The benchmarking code will embed the
+# second string inside several sizes of padding, to test how regex
+# matching performs on large strings.
+benchmarks = [
+    # test common prefix
+    ('Python|Perl', 'Perl'),    # Alternation
+    ('(Python|Perl)', 'Perl'),  # Grouped alternation
+    ('Python|Perl|Tcl', 'Perl'),        # Alternation
+    ('(Python|Perl|Tcl)', 'Perl'),      # Grouped alternation
+    ('(Python)\\1', 'PythonPython'),    # Backreference
+    ('([0a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # Disable the fastmap optimization
+    ('([a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # A few sets
+    ('Python', 'Python'),               # Simple text literal
+    ('.*Python', 'Python'),             # Bad text literal
+    ('.*Python.*', 'Python'),           # Worse text literal
+    ('.*(Python)', 'Python'),           # Bad text literal with grouping
+]
+# Test suite (for verifying correctness)
+#
+# The test suite is a list of 5- or 3-tuples.  The 5 parts of a
+# complete tuple are:
+# element 0: a string containing the pattern
+#         1: the string to match against the pattern
+#         2: the expected result (SUCCEED, FAIL, SYNTAX_ERROR)
+#         3: a string that will be eval()'ed to produce a test string.
+#            This is an arbitrary Python expression; the available
+#            variables are "found" (the whole match), and "g1", "g2", ...
+#            up to "g99" contain the contents of each group, or the
+#            string 'None' if the group wasn't given a value, or the
+#            string 'Error' if the group index was out of range;
+#            also "groups", the return value of m.group() (a tuple).
+#         4: The expected result of evaluating the expression.
+#            If the two don't match, an error is reported.
+#
+# If the regex isn't expected to work, the latter two elements can be omitted.
+tests = [
+    # Test ?P< and ?P= extensions
+    ('(?P<foo_123', '', SYNTAX_ERROR),      # Unterminated group identifier
+    ('(?P<1>a)', '', SYNTAX_ERROR),         # Begins with a digit
+    ('(?P<!>a)', '', SYNTAX_ERROR),         # Begins with an illegal char
+    ('(?P<foo!>a)', '', SYNTAX_ERROR),      # Begins with an illegal char
+    # Same tests, for the ?P= form
+    ('(?P<foo_123>a)(?P=foo_123', 'aa', SYNTAX_ERROR),
+    ('(?P<foo_123>a)(?P=1)', 'aa', SYNTAX_ERROR),
+    ('(?P<foo_123>a)(?P=!)', 'aa', SYNTAX_ERROR),
+    ('(?P<foo_123>a)(?P=foo_124', 'aa', SYNTAX_ERROR),  # Backref to undefined group
+    ('(?P<foo_123>a)', 'a', SUCCEED, 'g1', 'a'),
+    ('(?P<foo_123>a)(?P=foo_123)', 'aa', SUCCEED, 'g1', 'a'),
+    # Test octal escapes
+    ('\\1', 'a', SYNTAX_ERROR),    # Backreference
+    ('[\\1]', '\1', SUCCEED, 'found', '\1'),  # Character
+    ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
+    ('\\141', 'a', SUCCEED, 'found', 'a'),
+    ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
+    # Test \0 is handled everywhere
+    (r'\0', '\0', SUCCEED, 'found', '\0'),
+    (r'[\0a]', '\0', SUCCEED, 'found', '\0'),
+    (r'[a\0]', '\0', SUCCEED, 'found', '\0'),
+    (r'[^a\0]', '\0', FAIL),
+    # Test various letter escapes
+    (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
+    (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
+    # NOTE: not an error under PCRE/PRE:
+    # (r'\u', '', SYNTAX_ERROR),    # A Perl escape
+    (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'),
+    (r'\xff', '\377', SUCCEED, 'found', chr(255)),
+    # new \x semantics
+    (r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)),
+    (r'\x00f', '\017', FAIL, 'found', chr(15)),
+    (r'\x00fe', '\376', FAIL, 'found', chr(254)),
+    # (r'\x00ffffffffffffff', '\377', SUCCEED, 'found', chr(255)),
+    # (r'\x00f', '\017', SUCCEED, 'found', chr(15)),
+    # (r'\x00fe', '\376', SUCCEED, 'found', chr(254)),
+    (r"^\w+=(\\[\000-\277]|[^\n\\])*", "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c",
+     SUCCEED, 'found', "SRC=eval.c g.c blah blah blah \\\\"),
+    # Test that . only matches \n in DOTALL mode
+    ('a.b', 'acb', SUCCEED, 'found', 'acb'),
+    ('a.b', 'a\nb', FAIL),
+    ('a.*b', 'acc\nccb', FAIL),
+    ('a.{4,5}b', 'acc\nccb', FAIL),
+    ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
+    ('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'),
+    ('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
+    ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
+    ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
+    (')', '', SYNTAX_ERROR),           # Unmatched right bracket
+    ('', '', SUCCEED, 'found', ''),    # Empty pattern
+    ('abc', 'abc', SUCCEED, 'found', 'abc'),
+    ('abc', 'xbc', FAIL),
+    ('abc', 'axc', FAIL),
+    ('abc', 'abx', FAIL),
+    ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
+    ('abc', 'ababc', SUCCEED, 'found', 'abc'),
+    ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
+    ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
+    ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
+    ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
+    ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
+    ('ab+bc', 'abc', FAIL),
+    ('ab+bc', 'abq', FAIL),
+    ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
+    ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
+    ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
+    ('ab?bc', 'abbbbc', FAIL),
+    ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
+    ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
+    ('^abc$', 'abcc', FAIL),
+    ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
+    ('^abc$', 'aabc', FAIL),
+    ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
+    ('^', 'abc', SUCCEED, 'found+"-"', '-'),
+    ('$', 'abc', SUCCEED, 'found+"-"', '-'),
+    ('a.c', 'abc', SUCCEED, 'found', 'abc'),
+    ('a.c', 'axc', SUCCEED, 'found', 'axc'),
+    ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
+    ('a.*c', 'axyzd', FAIL),
+    ('a[bc]d', 'abc', FAIL),
+    ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
+    ('a[b-d]e', 'abd', FAIL),
+    ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
+    ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
+    ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
+    ('a[\\-b]', 'a-', SUCCEED, 'found', 'a-'),
+    # NOTE: not an error under PCRE/PRE:
+    # ('a[b-]', 'a-', SYNTAX_ERROR),
+    ('a[]b', '-', SYNTAX_ERROR),
+    ('a[', '-', SYNTAX_ERROR),
+    ('a\\', '-', SYNTAX_ERROR),
+    ('abc)', '-', SYNTAX_ERROR),
+    ('(abc', '-', SYNTAX_ERROR),
+    ('a]', 'a]', SUCCEED, 'found', 'a]'),
+    ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
+    ('a[\]]b', 'a]b', SUCCEED, 'found', 'a]b'),
+    ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
+    ('a[^bc]d', 'abd', FAIL),
+    ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
+    ('a[^-b]c', 'a-c', FAIL),
+    ('a[^]b]c', 'a]c', FAIL),
+    ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
+    ('\\ba\\b', 'a-', SUCCEED, '"-"', '-'),
+    ('\\ba\\b', '-a', SUCCEED, '"-"', '-'),
+    ('\\ba\\b', '-a-', SUCCEED, '"-"', '-'),
+    ('\\by\\b', 'xy', FAIL),
+    ('\\by\\b', 'yz', FAIL),
+    ('\\by\\b', 'xyz', FAIL),
+    ('x\\b', 'xyz', FAIL),
+    ('x\\B', 'xyz', SUCCEED, '"-"', '-'),
+    ('\\Bz', 'xyz', SUCCEED, '"-"', '-'),
+    ('z\\B', 'xyz', FAIL),
+    ('\\Bx', 'xyz', FAIL),
+    ('\\Ba\\B', 'a-', FAIL, '"-"', '-'),
+    ('\\Ba\\B', '-a', FAIL, '"-"', '-'),
+    ('\\Ba\\B', '-a-', FAIL, '"-"', '-'),
+    ('\\By\\B', 'xy', FAIL),
+    ('\\By\\B', 'yz', FAIL),
+    ('\\By\\b', 'xy', SUCCEED, '"-"', '-'),
+    ('\\by\\B', 'yz', SUCCEED, '"-"', '-'),
+    ('\\By\\B', 'xyz', SUCCEED, '"-"', '-'),
+    ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
+    ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
+    ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
+    ('$b', 'b', FAIL),
+    ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
+    ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
+    ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
+    ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
+    ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
+    ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
+    ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
+    ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
+    ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
+    ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
+    (')(', '-', SYNTAX_ERROR),
+    ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
+    ('abc', '', FAIL),
+    ('a*', '', SUCCEED, 'found', ''),
+    ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
+    ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
+    ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
+    ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
+    ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
+    ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
+    ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
+    ('^(ab|cd)e', 'abcde', FAIL, 'xg1y', 'xy'),
+    ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
+    ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
+    ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
+    ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
+    ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
+    ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
+    ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
+    ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
+    ('a[bcd]+dcdcde', 'adcdcde', FAIL),
+    ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
+    ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
+    ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
+    ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
+    ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
+    ('multiple words of text', 'uh-uh', FAIL),
+    ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
+    ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
+    ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
+    ('[k]', 'ab', FAIL),
+    ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
+    ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
+    ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
+    ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
+    ('(a+).\\1$', 'aaaaa', SUCCEED, 'found+"-"+g1', 'aaaaa-aa'),
+    ('^(a+).\\1$', 'aaaa', FAIL),
+    ('(abc)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
+    ('([a-c]+)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
+    ('(a)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
+    ('(a+)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
+    ('(a+)+\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
+    ('(a).+\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
+    ('(a)ba*\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
+    ('(aa|a)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
+    ('(a|aa)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
+    ('(a+)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
+    ('([abc]*)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
+    ('(a)(b)c|ab', 'ab', SUCCEED, 'found+"-"+g1+"-"+g2', 'ab-None-None'),
+    ('(a)+x', 'aaax', SUCCEED, 'found+"-"+g1', 'aaax-a'),
+    ('([ac])+x', 'aacx', SUCCEED, 'found+"-"+g1', 'aacx-c'),
+    ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', SUCCEED, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/'),
+    ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', SUCCEED, 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah'),
+    ('([^N]*N)+', 'abNNxyzN', SUCCEED, 'found+"-"+g1', 'abNNxyzN-xyzN'),
+    ('([^N]*N)+', 'abNNxyz', SUCCEED, 'found+"-"+g1', 'abNN-N'),
+    ('([abc]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'abcx-abc'),
+    ('([abc]*)x', 'abc', FAIL),
+    ('([xyz]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'x-'),
+    ('(a)+b|aac', 'aac', SUCCEED, 'found+"-"+g1', 'aac-None'),
+    # Test symbolic groups
+    ('(?P<i d>aaa)a', 'aaaa', SYNTAX_ERROR),
+    ('(?P<id>aaa)a', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aaa'),
+    ('(?P<id>aa)(?P=id)', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aa'),
+    ('(?P<id>aa)(?P=xd)', 'aaaa', SYNTAX_ERROR),
+    # Test octal escapes/memory references
+    ('\\1', 'a', SYNTAX_ERROR),
+    ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
+    ('\\141', 'a', SUCCEED, 'found', 'a'),
+    ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
+    # All tests from Perl
+    ('abc', 'abc', SUCCEED, 'found', 'abc'),
+    ('abc', 'xbc', FAIL),
+    ('abc', 'axc', FAIL),
+    ('abc', 'abx', FAIL),
+    ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
+    ('abc', 'ababc', SUCCEED, 'found', 'abc'),
+    ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
+    ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
+    ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
+    ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
+    ('ab{0,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
+    ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
+    ('ab+bc', 'abc', FAIL),
+    ('ab+bc', 'abq', FAIL),
+    ('ab{1,}bc', 'abq', FAIL),
+    ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
+    ('ab{1,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
+    ('ab{1,3}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
+    ('ab{3,4}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
+    ('ab{4,5}bc', 'abbbbc', FAIL),
+    ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
+    ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
+    ('ab{0,1}bc', 'abc', SUCCEED, 'found', 'abc'),
+    ('ab?bc', 'abbbbc', FAIL),
+    ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
+    ('ab{0,1}c', 'abc', SUCCEED, 'found', 'abc'),
+    ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
+    ('^abc$', 'abcc', FAIL),
+    ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
+    ('^abc$', 'aabc', FAIL),
+    ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
+    ('^', 'abc', SUCCEED, 'found', ''),
+    ('$', 'abc', SUCCEED, 'found', ''),
+    ('a.c', 'abc', SUCCEED, 'found', 'abc'),
+    ('a.c', 'axc', SUCCEED, 'found', 'axc'),
+    ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
+    ('a.*c', 'axyzd', FAIL),
+    ('a[bc]d', 'abc', FAIL),
+    ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
+    ('a[b-d]e', 'abd', FAIL),
+    ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
+    ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
+    ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
+    ('a[b-]', 'a-', SUCCEED, 'found', 'a-'),
+    ('a[b-a]', '-', SYNTAX_ERROR),
+    ('a[]b', '-', SYNTAX_ERROR),
+    ('a[', '-', SYNTAX_ERROR),
+    ('a]', 'a]', SUCCEED, 'found', 'a]'),
+    ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
+    ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
+    ('a[^bc]d', 'abd', FAIL),
+    ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
+    ('a[^-b]c', 'a-c', FAIL),
+    ('a[^]b]c', 'a]c', FAIL),
+    ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
+    ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
+    ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
+    ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
+    ('*a', '-', SYNTAX_ERROR),
+    ('(*)b', '-', SYNTAX_ERROR),
+    ('$b', 'b', FAIL),
+    ('a\\', '-', SYNTAX_ERROR),
+    ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
+    ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
+    ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
+    ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
+    ('abc)', '-', SYNTAX_ERROR),
+    ('(abc', '-', SYNTAX_ERROR),
+    ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
+    ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
+    ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
+    ('a{1,}b{1,}c', 'aabbabc', SUCCEED, 'found', 'abc'),
+    ('a**', '-', SYNTAX_ERROR),
+    ('a.+?c', 'abcabc', SUCCEED, 'found', 'abc'),
+    ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
+    ('(a+|b){0,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
+    ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
+    ('(a+|b){1,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
+    ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
+    ('(a+|b){0,1}', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
+    (')(', '-', SYNTAX_ERROR),
+    ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
+    ('abc', '', FAIL),
+    ('a*', '', SUCCEED, 'found', ''),
+    ('([abc])*d', 'abbbcd', SUCCEED, 'found+"-"+g1', 'abbbcd-c'),
+    ('([abc])*bcd', 'abcd', SUCCEED, 'found+"-"+g1', 'abcd-a'),
+    ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
+    ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
+    ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
+    ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
+    ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
+    ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
+    ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
+    ('^(ab|cd)e', 'abcde', FAIL),
+    ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
+    ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
+    ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
+    ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
+    ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
+    ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
+    ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
+    ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
+    ('a[bcd]+dcdcde', 'adcdcde', FAIL),
+    ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
+    ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
+    ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
+    ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
+    ('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'),
+    ('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'),
+# Python does not have the same rules for \\41 so this is a syntax error
+#    ('((((((((((a))))))))))\\41', 'aa', FAIL),
+#    ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
+    ('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
+    ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
+    ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
+    ('multiple words of text', 'uh-uh', FAIL),
+    ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
+    ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
+    ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
+    ('[k]', 'ab', FAIL),
+    ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
+    ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
+    ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
+    ('(?i)abc', 'ABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)abc', 'XBC', FAIL),
+    ('(?i)abc', 'AXC', FAIL),
+    ('(?i)abc', 'ABX', FAIL),
+    ('(?i)abc', 'XABCY', SUCCEED, 'found', 'ABC'),
+    ('(?i)abc', 'ABABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)ab*c', 'ABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)ab*bc', 'ABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)ab*bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
+    ('(?i)ab*?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
+    ('(?i)ab{0,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
+    ('(?i)ab+?bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
+    ('(?i)ab+bc', 'ABC', FAIL),
+    ('(?i)ab+bc', 'ABQ', FAIL),
+    ('(?i)ab{1,}bc', 'ABQ', FAIL),
+    ('(?i)ab+bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
+    ('(?i)ab{1,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
+    ('(?i)ab{1,3}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
+    ('(?i)ab{3,4}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
+    ('(?i)ab{4,5}?bc', 'ABBBBC', FAIL),
+    ('(?i)ab??bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
+    ('(?i)ab??bc', 'ABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)ab{0,1}?bc', 'ABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)ab??bc', 'ABBBBC', FAIL),
+    ('(?i)ab??c', 'ABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)ab{0,1}?c', 'ABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)^abc$', 'ABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)^abc$', 'ABCC', FAIL),
+    ('(?i)^abc', 'ABCC', SUCCEED, 'found', 'ABC'),
+    ('(?i)^abc$', 'AABC', FAIL),
+    ('(?i)abc$', 'AABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)^', 'ABC', SUCCEED, 'found', ''),
+    ('(?i)$', 'ABC', SUCCEED, 'found', ''),
+    ('(?i)a.c', 'ABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)a.c', 'AXC', SUCCEED, 'found', 'AXC'),
+    ('(?i)a.*?c', 'AXYZC', SUCCEED, 'found', 'AXYZC'),
+    ('(?i)a.*c', 'AXYZD', FAIL),
+    ('(?i)a[bc]d', 'ABC', FAIL),
+    ('(?i)a[bc]d', 'ABD', SUCCEED, 'found', 'ABD'),
+    ('(?i)a[b-d]e', 'ABD', FAIL),
+    ('(?i)a[b-d]e', 'ACE', SUCCEED, 'found', 'ACE'),
+    ('(?i)a[b-d]', 'AAC', SUCCEED, 'found', 'AC'),
+    ('(?i)a[-b]', 'A-', SUCCEED, 'found', 'A-'),
+    ('(?i)a[b-]', 'A-', SUCCEED, 'found', 'A-'),
+    ('(?i)a[b-a]', '-', SYNTAX_ERROR),
+    ('(?i)a[]b', '-', SYNTAX_ERROR),
+    ('(?i)a[', '-', SYNTAX_ERROR),
+    ('(?i)a]', 'A]', SUCCEED, 'found', 'A]'),
+    ('(?i)a[]]b', 'A]B', SUCCEED, 'found', 'A]B'),
+    ('(?i)a[^bc]d', 'AED', SUCCEED, 'found', 'AED'),
+    ('(?i)a[^bc]d', 'ABD', FAIL),
+    ('(?i)a[^-b]c', 'ADC', SUCCEED, 'found', 'ADC'),
+    ('(?i)a[^-b]c', 'A-C', FAIL),
+    ('(?i)a[^]b]c', 'A]C', FAIL),
+    ('(?i)a[^]b]c', 'ADC', SUCCEED, 'found', 'ADC'),
+    ('(?i)ab|cd', 'ABC', SUCCEED, 'found', 'AB'),
+    ('(?i)ab|cd', 'ABCD', SUCCEED, 'found', 'AB'),
+    ('(?i)()ef', 'DEF', SUCCEED, 'found+"-"+g1', 'EF-'),
+    ('(?i)*a', '-', SYNTAX_ERROR),
+    ('(?i)(*)b', '-', SYNTAX_ERROR),
+    ('(?i)$b', 'B', FAIL),
+    ('(?i)a\\', '-', SYNTAX_ERROR),
+    ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-Error'),
+    ('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'),
+    ('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'),
+    ('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'),
+    ('(?i)abc)', '-', SYNTAX_ERROR),
+    ('(?i)(abc', '-', SYNTAX_ERROR),
+    ('(?i)((a))', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'A-A-A'),
+    ('(?i)(a)b(c)', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABC-A-C'),
+    ('(?i)a+b+c', 'AABBABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)a{1,}b{1,}c', 'AABBABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)a**', '-', SYNTAX_ERROR),
+    ('(?i)a.+?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)a.*?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)a.{0,5}?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)(a+|b)*', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
+    ('(?i)(a+|b){0,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
+    ('(?i)(a+|b)+', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
+    ('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
+    ('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
+    ('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
+    ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-None'),
+    ('(?i))(', '-', SYNTAX_ERROR),
+    ('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'),
+    ('(?i)abc', '', FAIL),
+    ('(?i)a*', '', SUCCEED, 'found', ''),
+    ('(?i)([abc])*d', 'ABBBCD', SUCCEED, 'found+"-"+g1', 'ABBBCD-C'),
+    ('(?i)([abc])*bcd', 'ABCD', SUCCEED, 'found+"-"+g1', 'ABCD-A'),
+    ('(?i)a|b|c|d|e', 'E', SUCCEED, 'found', 'E'),
+    ('(?i)(a|b|c|d|e)f', 'EF', SUCCEED, 'found+"-"+g1', 'EF-E'),
+    ('(?i)abcd*efg', 'ABCDEFG', SUCCEED, 'found', 'ABCDEFG'),
+    ('(?i)ab*', 'XABYABBBZ', SUCCEED, 'found', 'AB'),
+    ('(?i)ab*', 'XAYABBBZ', SUCCEED, 'found', 'A'),
+    ('(?i)(ab|cd)e', 'ABCDE', SUCCEED, 'found+"-"+g1', 'CDE-CD'),
+    ('(?i)[abhgefdc]ij', 'HIJ', SUCCEED, 'found', 'HIJ'),
+    ('(?i)^(ab|cd)e', 'ABCDE', FAIL),
+    ('(?i)(abc|)ef', 'ABCDEF', SUCCEED, 'found+"-"+g1', 'EF-'),
+    ('(?i)(a|b)c*d', 'ABCD', SUCCEED, 'found+"-"+g1', 'BCD-B'),
+    ('(?i)(ab|ab*)bc', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-A'),
+    ('(?i)a([bc]*)c*', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-BC'),
+    ('(?i)a([bc]*)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
+    ('(?i)a([bc]+)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
+    ('(?i)a([bc]*)(c+d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD'),
+    ('(?i)a[bcd]*dcdcde', 'ADCDCDE', SUCCEED, 'found', 'ADCDCDE'),
+    ('(?i)a[bcd]+dcdcde', 'ADCDCDE', FAIL),
+    ('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'),
+    ('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'),
+    ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'),
+    ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-None'),
+    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
+    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'),
+    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL),
+    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL),
+    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
+    ('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'),
+    ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
+    #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
+    #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
+    ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
+    ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
+    ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
+    ('(?i)multiple words of text', 'UH-UH', FAIL),
+    ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'),
+    ('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'),
+    ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, 'g2+"-"+g1', 'B-A'),
+    ('(?i)[k]', 'AB', FAIL),
+#    ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'),
+#    ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'),
+    ('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'),
+    ('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
+    ('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
+    ('a(?!b).', 'abad', SUCCEED, 'found', 'ad'),
+    ('a(?=d).', 'abad', SUCCEED, 'found', 'ad'),
+    ('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'),
+    ('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'),
+    ('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'),
+    ('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'),
+    ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'),
+    ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
+    # lookbehind: split by : but not if it is escaped by -.
+    ('(?<!-):(.*?)(?<!-):', 'a:bc-:de:f', SUCCEED, 'g1', 'bc-:de' ),
+    # escaping with \ as we know it
+    ('(?<!\\\):(.*?)(?<!\\\):', 'a:bc\\:de:f', SUCCEED, 'g1', 'bc\\:de' ),
+    # terminating with ' and escaping with ? as in edifact
+    ("(?<!\\?)'(.*?)(?<!\\?)'", "a'bc?'de'f", SUCCEED, 'g1', "bc?'de" ),
+    # Comments using the (?#...) syntax
+    ('w(?# comment', 'w', SYNTAX_ERROR),
+    ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', SUCCEED, 'found', 'wxyz'),
+    # Check odd placement of embedded pattern modifiers
+    # not an error under PCRE/PRE:
+    ('w(?i)', 'W', SUCCEED, 'found', 'W'),
+    # ('w(?i)', 'W', SYNTAX_ERROR),
+    # Comments using the x embedded pattern modifier
+    ("""(?x)w# comment 1
+        x y
+        # comment 2
+        z""", 'wxyz', SUCCEED, 'found', 'wxyz'),
+    # using the m embedded pattern modifier
+    ('^abc', """jkl
+abc
+xyz""", FAIL),
+    ('(?m)^abc', """jkl
+abc
+xyz""", SUCCEED, 'found', 'abc'),
+    ('(?m)abc$', """jkl
+xyzabc
+123""", SUCCEED, 'found', 'abc'),
+    # using the s embedded pattern modifier
+    ('a.b', 'a\nb', FAIL),
+    ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
+    # test \w, etc. both inside and outside character classes
+    ('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
+    ('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
+    ('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'),
+    ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'),
+    ('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'),
+    # not an error under PCRE/PRE:
+    # ('[\\d-x]', '-', SYNTAX_ERROR),
+    (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
+    (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
+    (r'\xff', '\377', SUCCEED, 'found', chr(255)),
+    # new \x semantics
+    (r'\x00ff', '\377', FAIL),
+    # (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
+    (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
+    ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
+    (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
+    (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'),
+    #
+    # post-1.5.2 additions
+    # xmllib problem
+    (r'(([a-z]+):)?([a-z]+)$', 'smil', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-smil'),
+    # bug 110866: reference to undefined group
+    (r'((.)\1+)', '', SYNTAX_ERROR),
+    # bug 111869: search (PRE/PCRE fails on this one, SRE doesn't)
+    (r'.*d', 'abc\nabd', SUCCEED, 'found', 'abd'),
+    # bug 112468: various expected syntax errors
+    (r'(', '', SYNTAX_ERROR),
+    (r'[\41]', '!', SUCCEED, 'found', '!'),
+    # bug 114033: nothing to repeat
+    (r'(x?)?', 'x', SUCCEED, 'found', 'x'),
+    # bug 115040: rescan if flags are modified inside pattern
+    (r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'),
+    # bug 115618: negative lookahead
+    (r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'),
+    # bug 116251: character class bug
+    (r'[\w-]+', 'laser_beam', SUCCEED, 'found', 'laser_beam'),
+    # bug 123769+127259: non-greedy backtracking bug
+    (r'.*?\S *:', 'xx:', SUCCEED, 'found', 'xx:'),
+    (r'a[ ]*?\ (\d+).*', 'a   10', SUCCEED, 'found', 'a   10'),
+    (r'a[ ]*?\ (\d+).*', 'a    10', SUCCEED, 'found', 'a    10'),
+    # bug 127259: \Z shouldn't depend on multiline mode
+    (r'(?ms).*?x\s*\Z(.*)','xx\nx\n', SUCCEED, 'g1', ''),
+    # bug 128899: uppercase literals under the ignorecase flag
+    (r'(?i)M+', 'MMM', SUCCEED, 'found', 'MMM'),
+    (r'(?i)m+', 'MMM', SUCCEED, 'found', 'MMM'),
+    (r'(?i)[M]+', 'MMM', SUCCEED, 'found', 'MMM'),
+    (r'(?i)[m]+', 'MMM', SUCCEED, 'found', 'MMM'),
+    # bug 130748: ^* should be an error (nothing to repeat)
+    (r'^*', '', SYNTAX_ERROR),
+    # bug 133283: minimizing repeat problem
+    (r'"(?:\\"|[^"])*?"', r'"\""', SUCCEED, 'found', r'"\""'),
+    # bug 477728: minimizing repeat problem
+    (r'^.*?$', 'one\ntwo\nthree\n', FAIL),
+    # bug 483789: minimizing repeat problem
+    (r'a[^>]*?b', 'a>b', FAIL),
+    # bug 490573: minimizing repeat problem
+    (r'^a*?$', 'foo', FAIL),
+    # bug 470582: nested groups problem
+    (r'^((a)c)?(ab)$', 'ab', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-ab'),
+    # another minimizing repeat problem (capturing groups in assertions)
+    ('^([ab]*?)(?=(b)?)c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
+    ('^([ab]*?)(?!(b))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
+    ('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
+]
+try:
+    u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
+except SyntaxError:
+    pass
+else:
+    tests.extend([
+    # bug 410271: \b broken under locales
+    (r'\b.\b', 'a', SUCCEED, 'found', 'a'),
+    (r'(?u)\b.\b', u, SUCCEED, 'found', u),
+    (r'(?u)\w', u, SUCCEED, 'found', u),
+    ])
--- a/from_cpython/Lib/test/test_re.py
+++ b/from_cpython/Lib/test/test_re.py
+from test.test_support import verbose, run_unittest, import_module
+from test.test_support import precisionbigmemtest, _2G, cpython_only
+from test.test_support import captured_stdout
+import re
+from re import Scanner
+import sre_constants
+import sys
+import string
+import traceback
+from weakref import proxy
+# Misc tests from Tim Peters' re.doc
+# WARNING: Don't change details in these tests if you don't know
+# what you're doing. Some of these tests were carefully modeled to
+# cover most of the code.
+import unittest
+class ReTests(unittest.TestCase):
+    def test_weakref(self):
+        s = 'QabbbcR'
+        x = re.compile('ab+c')
+        y = proxy(x)
+        self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
+    def test_search_star_plus(self):
+        self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
+        self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
+        self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
+        self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
+        self.assertEqual(re.search('x', 'aaa'), None)
+        self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
+        self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
+        self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
+        self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
+        self.assertEqual(re.match('a+', 'xxx'), None)
+    def bump_num(self, matchobj):
+        int_value = int(matchobj.group(0))
+        return str(int_value + 1)
+    def test_basic_re_sub(self):
+        self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
+        self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
+                         '9.3 -3 24x100y')
+        self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
+                         '9.3 -3 23x99y')
+        self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
+        self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
+        s = r"\1\1"
+        self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
+        self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
+        self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
+        self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
+        self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
+        self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
+        self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
+        self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
+                         '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
+        self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
+        self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
+                         (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
+        self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
+    def test_bug_449964(self):
+        # fails for group followed by other escape
+        self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
+                         'xx\bxx\b')
+    def test_bug_449000(self):
+        # Test for sub() on escaped characters
+        self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
+                         'abc\ndef\n')
+        self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
+                         'abc\ndef\n')
+        self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
+                         'abc\ndef\n')
+        self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
+                         'abc\ndef\n')
+    def test_bug_1140(self):
+        # re.sub(x, y, u'') should return u'', not '', and
+        # re.sub(x, y, '') should return '', not u''.
+        # Also:
+        # re.sub(x, y, unicode(x)) should return unicode(y), and
+        # re.sub(x, y, str(x)) should return
+        #     str(y) if isinstance(y, str) else unicode(y).
+        for x in 'x', u'x':
+            for y in 'y', u'y':
+                z = re.sub(x, y, u'')
+                self.assertEqual(z, u'')
+                self.assertEqual(type(z), unicode)
+                #
+                z = re.sub(x, y, '')
+                self.assertEqual(z, '')
+                self.assertEqual(type(z), str)
+                #
+                z = re.sub(x, y, unicode(x))
+                self.assertEqual(z, y)
+                self.assertEqual(type(z), unicode)
+                #
+                z = re.sub(x, y, str(x))
+                self.assertEqual(z, y)
+                self.assertEqual(type(z), type(y))
+    def test_bug_1661(self):
+        # Verify that flags do not get silently ignored with compiled patterns
+        pattern = re.compile('.')
+        self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
+        self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
+        self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
+        self.assertRaises(ValueError, re.compile, pattern, re.I)
+    def test_bug_3629(self):
+        # A regex that triggered a bug in the sre-code validator
+        re.compile("(?P<quote>)(?(quote))")
+    def test_sub_template_numeric_escape(self):
+        # bug 776311 and friends
+        self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
+        self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
+        self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
+        self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
+        self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
+        self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
+        self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
+        self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
+        self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
+        self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
+        self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
+        self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
+        self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
+        self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
+        self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
+        self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
+        self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
+        self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
+        self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
+        # in python2.3 (etc), these loop endlessly in sre_parser.py
+        self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
+        self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
+                         'xz8')
+        self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
+                         'xza')
+    def test_qualified_re_sub(self):
+        self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
+        self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
+    def test_bug_114660(self):
+        self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello  there'),
+                         'hello there')
+    def test_bug_462270(self):
+        # Test for empty sub() behaviour, see SF bug #462270
+        self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
+        self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
+    def test_symbolic_groups(self):
+        re.compile('(?P<a>x)(?P=a)(?(a)y)')
+        re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
+        self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
+        self.assertRaises(re.error, re.compile, '(?Px)')
+        self.assertRaises(re.error, re.compile, '(?P=)')
+        self.assertRaises(re.error, re.compile, '(?P=1)')
+        self.assertRaises(re.error, re.compile, '(?P=a)')
+        self.assertRaises(re.error, re.compile, '(?P=a1)')
+        self.assertRaises(re.error, re.compile, '(?P=a.)')
+        self.assertRaises(re.error, re.compile, '(?P<)')
+        self.assertRaises(re.error, re.compile, '(?P<>)')
+        self.assertRaises(re.error, re.compile, '(?P<1>)')
+        self.assertRaises(re.error, re.compile, '(?P<a.>)')
+        self.assertRaises(re.error, re.compile, '(?())')
+        self.assertRaises(re.error, re.compile, '(?(a))')
+        self.assertRaises(re.error, re.compile, '(?(1a))')
+        self.assertRaises(re.error, re.compile, '(?(a.))')
+    def test_symbolic_refs(self):
+        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
+        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
+        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
+        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
+        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
+        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
+        self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
+        self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
+        self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
+        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
+    def test_re_subn(self):
+        self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
+        self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
+        self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
+        self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
+        self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
+    def test_re_split(self):
+        self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
+        self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
+        self.assertEqual(re.split("(:*)", ":a:b::c"),
+                         ['', ':', 'a', ':', 'b', '::', 'c'])
+        self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
+        self.assertEqual(re.split("(:)*", ":a:b::c"),
+                         ['', ':', 'a', ':', 'b', ':', 'c'])
+        self.assertEqual(re.split("([b:]+)", ":a:b::c"),
+                         ['', ':', 'a', ':b::', 'c'])
+        self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
+                         ['', None, ':', 'a', None, ':', '', 'b', None, '',
+                          None, '::', 'c'])
+        self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
+                         ['', 'a', '', '', 'c'])
+    def test_qualified_re_split(self):
+        self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
+        self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
+        self.assertEqual(re.split("(:)", ":a:b::c", 2),
+                         ['', ':', 'a', ':', 'b::c'])
+        self.assertEqual(re.split("(:*)", ":a:b::c", 2),
+                         ['', ':', 'a', ':', 'b::c'])
+    def test_re_findall(self):
+        self.assertEqual(re.findall(":+", "abc"), [])
+        self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
+        self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
+        self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
+                                                               (":", ":"),
+                                                               (":", "::")])
+    def test_bug_117612(self):
+        self.assertEqual(re.findall(r"(a|(b))", "aba"),
+                         [("a", ""),("b", "b"),("a", "")])
+    def test_re_match(self):
+        self.assertEqual(re.match('a', 'a').groups(), ())
+        self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
+        self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
+        self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
+        self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
+        pat = re.compile('((a)|(b))(c)?')
+        self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
+        self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
+        self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
+        self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
+        self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
+        # A single group
+        m = re.match('(a)', 'a')
+        self.assertEqual(m.group(0), 'a')
+        self.assertEqual(m.group(0), 'a')
+        self.assertEqual(m.group(1), 'a')
+        self.assertEqual(m.group(1, 1), ('a', 'a'))
+        pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
+        self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
+        self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
+                         (None, 'b', None))
+        self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
+    def test_re_groupref_exists(self):
+        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
+                         ('(', 'a'))
+        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
+                         (None, 'a'))
+        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
+        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
+        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
+                         ('a', 'b'))
+        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
+                         (None, 'd'))
+        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
+                         (None, 'd'))
+        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
+                         ('a', ''))
+        # Tests for bug #1177831: exercise groups other than the first group
+        p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
+        self.assertEqual(p.match('abc').groups(),
+                         ('a', 'b', 'c'))
+        self.assertEqual(p.match('ad').groups(),
+                         ('a', None, 'd'))
+        self.assertEqual(p.match('abd'), None)
+        self.assertEqual(p.match('ac'), None)
+    def test_re_groupref(self):
+        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
+                         ('|', 'a'))
+        self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
+                         (None, 'a'))
+        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
+        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
+        self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
+                         ('a', 'a'))
+        self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
+                         (None, None))
+    def test_groupdict(self):
+        self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
+                                  'first second').groupdict(),
+                         {'first':'first', 'second':'second'})
+    def test_expand(self):
+        self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
+                                  "first second")
+                                  .expand(r"\2 \1 \g<second> \g<first>"),
+                         "second first second first")
+    def test_repeat_minmax(self):
+        self.assertEqual(re.match("^(\w){1}$", "abc"), None)
+        self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
+        self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
+        self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
+        self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
+        self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
+        self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
+        self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
+        self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
+        self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
+        self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
+        self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
+        self.assertEqual(re.match("^x{1}$", "xxx"), None)
+        self.assertEqual(re.match("^x{1}?$", "xxx"), None)
+        self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
+        self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
+        self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
+        self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
+        self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
+        self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
+        self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
+        self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
+        self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
+        self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
+        self.assertEqual(re.match("^x{}$", "xxx"), None)
+        self.assertNotEqual(re.match("^x{}$", "x{}"), None)
+    def test_getattr(self):
+        self.assertEqual(re.match("(a)", "a").pos, 0)
+        self.assertEqual(re.match("(a)", "a").endpos, 1)
+        self.assertEqual(re.match("(a)", "a").string, "a")
+        self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
+        self.assertNotEqual(re.match("(a)", "a").re, None)
+    def test_special_escapes(self):
+        self.assertEqual(re.search(r"\b(b.)\b",
+                                   "abcd abc bcd bx").group(1), "bx")
+        self.assertEqual(re.search(r"\B(b.)\B",
+                                   "abc bcd bc abxd").group(1), "bx")
+        self.assertEqual(re.search(r"\b(b.)\b",
+                                   "abcd abc bcd bx", re.LOCALE).group(1), "bx")
+        self.assertEqual(re.search(r"\B(b.)\B",
+                                   "abc bcd bc abxd", re.LOCALE).group(1), "bx")
+        self.assertEqual(re.search(r"\b(b.)\b",
+                                   "abcd abc bcd bx", re.UNICODE).group(1), "bx")
+        self.assertEqual(re.search(r"\B(b.)\B",
+                                   "abc bcd bc abxd", re.UNICODE).group(1), "bx")
+        self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
+        self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
+        self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
+        self.assertEqual(re.search(r"\b(b.)\b",
+                                   u"abcd abc bcd bx").group(1), "bx")
+        self.assertEqual(re.search(r"\B(b.)\B",
+                                   u"abc bcd bc abxd").group(1), "bx")
+        self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
+        self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
+        self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
+        self.assertEqual(re.search(r"\d\D\w\W\s\S",
+                                   "1aa! a").group(0), "1aa! a")
+        self.assertEqual(re.search(r"\d\D\w\W\s\S",
+                                   "1aa! a", re.LOCALE).group(0), "1aa! a")
+        self.assertEqual(re.search(r"\d\D\w\W\s\S",
+                                   "1aa! a", re.UNICODE).group(0), "1aa! a")
+    def test_string_boundaries(self):
+        # See http://bugs.python.org/issue10713
+        self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
+                         "abc")
+        # There's a word boundary at the start of a string.
+        self.assertTrue(re.match(r"\b", "abc"))
+        # A non-empty string includes a non-boundary zero-length match.
+        self.assertTrue(re.search(r"\B", "abc"))
+        # There is no non-boundary match at the start of a string.
+        self.assertFalse(re.match(r"\B", "abc"))
+        # However, an empty string contains no word boundaries, and also no
+        # non-boundaries.
+        self.assertEqual(re.search(r"\B", ""), None)
+        # This one is questionable and different from the perlre behaviour,
+        # but describes current behavior.
+        self.assertEqual(re.search(r"\b", ""), None)
+        # A single word-character string has two boundaries, but no
+        # non-boundary gaps.
+        self.assertEqual(len(re.findall(r"\b", "a")), 2)
+        self.assertEqual(len(re.findall(r"\B", "a")), 0)
+        # If there are no words, there are no boundaries
+        self.assertEqual(len(re.findall(r"\b", " ")), 0)
+        self.assertEqual(len(re.findall(r"\b", "   ")), 0)
+        # Can match around the whitespace.
+        self.assertEqual(len(re.findall(r"\B", " ")), 2)
+    def test_bigcharset(self):
+        self.assertEqual(re.match(u"([\u2222\u2223])",
+                                  u"\u2222").group(1), u"\u2222")
+        self.assertEqual(re.match(u"([\u2222\u2223])",
+                                  u"\u2222", re.UNICODE).group(1), u"\u2222")
+        r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255)))
+        self.assertEqual(re.match(r, u"\uff01", re.UNICODE).group(), u"\uff01")
+    def test_big_codesize(self):
+        # Issue #1160
+        r = re.compile('|'.join(('%d'%x for x in range(10000))))
+        self.assertIsNotNone(r.match('1000'))
+        self.assertIsNotNone(r.match('9999'))
+    def test_anyall(self):
+        self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
+                         "a\nb")
+        self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
+                         "a\n\nb")
+    def test_non_consuming(self):
+        self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
+        self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
+        self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
+        self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
+        self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
+        self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
+        self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
+        self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
+        self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
+        self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
+        self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
+    def test_ignore_case(self):
+        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
+        self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
+        self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
+        self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
+        self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
+        self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
+        self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
+        self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
+        self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
+        self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
+    def test_category(self):
+        self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
+    def test_getlower(self):
+        import _sre
+        self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
+        self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
+        self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
+        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
+        self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
+    def test_not_literal(self):
+        self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
+        self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
+    def test_search_coverage(self):
+        self.assertEqual(re.search("\s(b)", " b").group(1), "b")
+        self.assertEqual(re.search("a\s", "a ").group(0), "a ")
+    def assertMatch(self, pattern, text, match=None, span=None,
+                    matcher=re.match):
+        if match is None and span is None:
+            # the pattern matches the whole text
+            match = text
+            span = (0, len(text))
+        elif match is None or span is None:
+            raise ValueError('If match is not None, span should be specified '
+                             '(and vice versa).')
+        m = matcher(pattern, text)
+        self.assertTrue(m)
+        self.assertEqual(m.group(), match)
+        self.assertEqual(m.span(), span)
+    def test_re_escape(self):
+        alnum_chars = string.ascii_letters + string.digits
+        p = u''.join(unichr(i) for i in range(256))
+        for c in p:
+            if c in alnum_chars:
+                self.assertEqual(re.escape(c), c)
+            elif c == u'\x00':
+                self.assertEqual(re.escape(c), u'\\000')
+            else:
+                self.assertEqual(re.escape(c), u'\\' + c)
+            self.assertMatch(re.escape(c), c)
+        self.assertMatch(re.escape(p), p)
+    def test_re_escape_byte(self):
+        alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
+        p = ''.join(chr(i) for i in range(256))
+        for b in p:
+            if b in alnum_chars:
+                self.assertEqual(re.escape(b), b)
+            elif b == b'\x00':
+                self.assertEqual(re.escape(b), b'\\000')
+            else:
+                self.assertEqual(re.escape(b), b'\\' + b)
+            self.assertMatch(re.escape(b), b)
+        self.assertMatch(re.escape(p), p)
+    def test_re_escape_non_ascii(self):
+        s = u'xxx\u2620\u2620\u2620xxx'
+        s_escaped = re.escape(s)
+        self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
+        self.assertMatch(s_escaped, s)
+        self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
+                         u'x\u2620\u2620\u2620x', (2, 7), re.search)
+    def test_re_escape_non_ascii_bytes(self):
+        b = u'y\u2620y\u2620y'.encode('utf-8')
+        b_escaped = re.escape(b)
+        self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
+        self.assertMatch(b_escaped, b)
+        res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
+        self.assertEqual(len(res), 2)
+    def test_pickling(self):
+        import pickle
+        self.pickle_test(pickle)
+        import cPickle
+        self.pickle_test(cPickle)
+        # old pickles expect the _compile() reconstructor in sre module
+        import_module("sre", deprecated=True)
+        from sre import _compile
+    def pickle_test(self, pickle):
+        oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
+        s = pickle.dumps(oldpat)
+        newpat = pickle.loads(s)
+        self.assertEqual(oldpat, newpat)
+    def test_constants(self):
+        self.assertEqual(re.I, re.IGNORECASE)
+        self.assertEqual(re.L, re.LOCALE)
+        self.assertEqual(re.M, re.MULTILINE)
+        self.assertEqual(re.S, re.DOTALL)
+        self.assertEqual(re.X, re.VERBOSE)
+    def test_flags(self):
+        for flag in [re.I, re.M, re.X, re.S, re.L]:
+            self.assertNotEqual(re.compile('^pattern$', flag), None)
+    def test_sre_character_literals(self):
+        for i in [0, 8, 16, 32, 64, 127, 128, 255]:
+            self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
+            self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
+            self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
+            self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
+            self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
+            self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
+        self.assertRaises(re.error, re.match, "\911", "")
+    def test_sre_character_class_literals(self):
+        for i in [0, 8, 16, 32, 64, 127, 128, 255]:
+            self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
+            self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
+            self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
+            self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
+            self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
+            self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
+        self.assertRaises(re.error, re.match, "[\911]", "")
+    def test_bug_113254(self):
+        self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
+        self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
+        self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
+    def test_bug_527371(self):
+        # bug described in patches 527371/672491
+        self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
+        self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
+        self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
+        self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
+        self.assertEqual(re.match("((a))", "a").lastindex, 1)
+    def test_bug_545855(self):
+        # bug 545855 -- This pattern failed to cause a compile error as it
+        # should, instead provoking a TypeError.
+        self.assertRaises(re.error, re.compile, 'foo[a-')
+    def test_bug_418626(self):
+        # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
+        # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
+        # pattern '*?' on a long string.
+        self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
+        self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
+                         20003)
+        self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
+        # non-simple '*?' still used to hit the recursion limit, before the
+        # non-recursive scheme was implemented.
+        self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
+    def test_bug_612074(self):
+        pat=u"["+re.escape(u"\u2039")+u"]"
+        self.assertEqual(re.compile(pat) and 1, 1)
+    def test_stack_overflow(self):
+        # nasty cases that used to overflow the straightforward recursive
+        # implementation of repeated groups.
+        self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
+        self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
+        self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
+    def test_unlimited_zero_width_repeat(self):
+        # Issue #9669
+        self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
+        self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
+        self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
+        self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
+        self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
+        self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
+    def test_scanner(self):
+        def s_ident(scanner, token): return token
+        def s_operator(scanner, token): return "op%s" % token
+        def s_float(scanner, token): return float(token)
+        def s_int(scanner, token): return int(token)
+        scanner = Scanner([
+            (r"[a-zA-Z_]\w*", s_ident),
+            (r"\d+\.\d*", s_float),
+            (r"\d+", s_int),
+            (r"=|\+|-|\*|/", s_operator),
+            (r"\s+", None),
+            ])
+        self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
+        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
+                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
+                           'op+', 'bar'], ''))
+    def test_bug_448951(self):
+        # bug 448951 (similar to 429357, but with single char match)
+        # (Also test greedy matches.)
+        for op in '','?','*':
+            self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
+                             (None, None))
+            self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
+                             ('a:', 'a'))
+    def test_bug_725106(self):
+        # capturing groups in alternatives in repeats
+        self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
+                         ('b', 'a'))
+        self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
+                         ('c', 'b'))
+        self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
+                         ('b', None))
+        self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
+                         ('b', None))
+        self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
+                         ('b', 'a'))
+        self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
+                         ('c', 'b'))
+        self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
+                         ('b', None))
+        self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
+                         ('b', None))
+    def test_bug_725149(self):
+        # mark_stack_base restoring before restoring marks
+        self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
+                         ('a', None))
+        self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
+                         ('a', None, None))
+    def test_bug_764548(self):
+        # bug 764548, re.compile() barfs on str/unicode subclasses
+        try:
+            unicode
+        except NameError:
+            self.skipTest('no problem if we have no unicode')
+        class my_unicode(unicode): pass
+        pat = re.compile(my_unicode("abc"))
+        self.assertEqual(pat.match("xyz"), None)
+    def test_finditer(self):
+        iter = re.finditer(r":+", "a:b::c:::d")
+        self.assertEqual([item.group(0) for item in iter],
+                         [":", "::", ":::"])
+    def test_bug_926075(self):
+        try:
+            unicode
+        except NameError:
+            self.skipTest('no problem if we have no unicode')
+        self.assertTrue(re.compile('bug_926075') is not
+                     re.compile(eval("u'bug_926075'")))
+    def test_bug_931848(self):
+        try:
+            unicode
+        except NameError:
+            self.skipTest('no problem if we have no unicode')
+        pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
+        self.assertEqual(re.compile(pattern).split("a.b.c"),
+                         ['a','b','c'])
+    def test_bug_581080(self):
+        iter = re.finditer(r"\s", "a b")
+        self.assertEqual(iter.next().span(), (1,2))
+        self.assertRaises(StopIteration, iter.next)
+        scanner = re.compile(r"\s").scanner("a b")
+        self.assertEqual(scanner.search().span(), (1, 2))
+        self.assertEqual(scanner.search(), None)
+    def test_bug_817234(self):
+        iter = re.finditer(r".*", "asdf")
+        self.assertEqual(iter.next().span(), (0, 4))
+        self.assertEqual(iter.next().span(), (4, 4))
+        self.assertRaises(StopIteration, iter.next)
+    def test_bug_6561(self):
+        # '\d' should match characters in Unicode category 'Nd'
+        # (Number, Decimal Digit), but not those in 'Nl' (Number,
+        # Letter) or 'No' (Number, Other).
+        decimal_digits = [
+            u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
+            u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
+            u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
+            ]
+        for x in decimal_digits:
+            self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
+        not_decimal_digits = [
+            u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
+            u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
+            u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
+            u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
+            ]
+        for x in not_decimal_digits:
+            self.assertIsNone(re.match('^\d$', x, re.UNICODE))
+    def test_empty_array(self):
+        # SF buf 1647541
+        import array
+        for typecode in 'cbBuhHiIlLfd':
+            a = array.array(typecode)
+            self.assertEqual(re.compile("bla").match(a), None)
+            self.assertEqual(re.compile("").match(a).groups(), ())
+    def test_inline_flags(self):
+        # Bug #1700
+        upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
+        lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
+        p = re.compile(upper_char, re.I | re.U)
+        q = p.match(lower_char)
+        self.assertNotEqual(q, None)
+        p = re.compile(lower_char, re.I | re.U)
+        q = p.match(upper_char)
+        self.assertNotEqual(q, None)
+        p = re.compile('(?i)' + upper_char, re.U)
+        q = p.match(lower_char)
+        self.assertNotEqual(q, None)
+        p = re.compile('(?i)' + lower_char, re.U)
+        q = p.match(upper_char)
+        self.assertNotEqual(q, None)
+        p = re.compile('(?iu)' + upper_char)
+        q = p.match(lower_char)
+        self.assertNotEqual(q, None)
+        p = re.compile('(?iu)' + lower_char)
+        q = p.match(upper_char)
+        self.assertNotEqual(q, None)
+    def test_dollar_matches_twice(self):
+        "$ matches the end of string, and just before the terminating \n"
+        pattern = re.compile('$')
+        self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
+        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
+        self.assertEqual(pattern.sub('#', '\n'), '#\n#')
+        pattern = re.compile('$', re.MULTILINE)
+        self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
+        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
+        self.assertEqual(pattern.sub('#', '\n'), '#\n#')
+    def test_dealloc(self):
+        # issue 3299: check for segfault in debug build
+        import _sre
+        # the overflow limit is different on wide and narrow builds and it
+        # depends on the definition of SRE_CODE (see sre.h).
+        # 2**128 should be big enough to overflow on both. For smaller values
+        # a RuntimeError is raised instead of OverflowError.
+        long_overflow = 2**128
+        self.assertRaises(TypeError, re.finditer, "a", {})
+        self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
+    def test_compile(self):
+        # Test return value when given string and pattern as parameter
+        pattern = re.compile('random pattern')
+        self.assertIsInstance(pattern, re._pattern_type)
+        same_pattern = re.compile(pattern)
+        self.assertIsInstance(same_pattern, re._pattern_type)
+        self.assertIs(same_pattern, pattern)
+        # Test behaviour when not given a string or pattern as parameter
+        self.assertRaises(TypeError, re.compile, 0)
+    def test_bug_13899(self):
+        # Issue #13899: re pattern r"[\A]" should work like "A" but matches
+        # nothing. Ditto B and Z.
+        self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
+                         ['A', 'B', '\b', 'C', 'Z'])
+    @precisionbigmemtest(size=_2G, memuse=1)
+    def test_large_search(self, size):
+        # Issue #10182: indices were 32-bit-truncated.
+        s = 'a' * size
+        m = re.search('$', s)
+        self.assertIsNotNone(m)
+        self.assertEqual(m.start(), size)
+        self.assertEqual(m.end(), size)
+    # The huge memuse is because of re.sub() using a list and a join()
+    # to create the replacement result.
+    @precisionbigmemtest(size=_2G, memuse=16 + 2)
+    def test_large_subn(self, size):
+        # Issue #10182: indices were 32-bit-truncated.
+        s = 'a' * size
+        r, n = re.subn('', '', s)
+        self.assertEqual(r, s)
+        self.assertEqual(n, size + 1)
+    def test_repeat_minmax_overflow(self):
+        # Issue #13169
+        string = "x" * 100000
+        self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
+        self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
+        self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
+        # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
+        self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
+        self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
+        self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
+        self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
+    @cpython_only
+    def test_repeat_minmax_overflow_maxrepeat(self):
+        try:
+            from _sre import MAXREPEAT
+        except ImportError:
+            self.skipTest('requires _sre.MAXREPEAT constant')
+        string = "x" * 100000
+        self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
+        self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
+                         (0, 100000))
+        self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
+        self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
+        self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
+        self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
+    def test_backref_group_name_in_exception(self):
+        # Issue 17341: Poor error message when compiling invalid regex
+        with self.assertRaisesRegexp(sre_constants.error, '<foo>'):
+            re.compile('(?P=<foo>)')
+    def test_group_name_in_exception(self):
+        # Issue 17341: Poor error message when compiling invalid regex
+        with self.assertRaisesRegexp(sre_constants.error, '\?foo'):
+            re.compile('(?P<?foo>)')
+    def test_issue17998(self):
+        for reps in '*', '+', '?', '{1}':
+            for mod in '', '?':
+                pattern = '.' + reps + mod + 'yz'
+                self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
+                                 ['xyz'], msg=pattern)
+                pattern = pattern.encode()
+                self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
+                                 [b'xyz'], msg=pattern)
+    def test_bug_2537(self):
+        # issue 2537: empty submatches
+        for outer_op in ('{0,}', '*', '+', '{1,187}'):
+            for inner_op in ('{0,}', '*', '?'):
+                r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
+                m = r.match("xyyzy")
+                self.assertEqual(m.group(0), "xyy")
+                self.assertEqual(m.group(1), "")
+                self.assertEqual(m.group(2), "y")
+    def test_debug_flag(self):
+        with captured_stdout() as out:
+            re.compile('foo', re.DEBUG)
+        self.assertEqual(out.getvalue().splitlines(),
+                         ['literal 102', 'literal 111', 'literal 111'])
+        # Debug output is output again even a second time (bypassing
+        # the cache -- issue #20426).
+        with captured_stdout() as out:
+            re.compile('foo', re.DEBUG)
+        self.assertEqual(out.getvalue().splitlines(),
+                         ['literal 102', 'literal 111', 'literal 111'])
+    def test_keyword_parameters(self):
+        # Issue #20283: Accepting the string keyword parameter.
+        pat = re.compile(r'(ab)')
+        self.assertEqual(
+            pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9))
+        self.assertEqual(
+            pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9))
+        self.assertEqual(
+            pat.findall(string='abracadabra', pos=3, endpos=10), ['ab'])
+        self.assertEqual(
+            pat.split(string='abracadabra', maxsplit=1),
+            ['', 'ab', 'racadabra'])
+def run_re_tests():
+    from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
+    if verbose:
+        print 'Running re_tests test suite'
+    else:
+        # To save time, only run the first and last 10 tests
+        #tests = tests[:10] + tests[-10:]
+        pass
+    for t in tests:
+        sys.stdout.flush()
+        pattern = s = outcome = repl = expected = None
+        if len(t) == 5:
+            pattern, s, outcome, repl, expected = t
+        elif len(t) == 3:
+            pattern, s, outcome = t
+        else:
+            raise ValueError, ('Test tuples should have 3 or 5 fields', t)
+        try:
+            obj = re.compile(pattern)
+        except re.error:
+            if outcome == SYNTAX_ERROR: pass  # Expected a syntax error
+            else:
+                print '=== Syntax error:', t
+        except KeyboardInterrupt: raise KeyboardInterrupt
+        except:
+            print '*** Unexpected error ***', t
+            if verbose:
+                traceback.print_exc(file=sys.stdout)
+        else:
+            try:
+                result = obj.search(s)
+            except re.error, msg:
+                print '=== Unexpected exception', t, repr(msg)
+            if outcome == SYNTAX_ERROR:
+                # This should have been a syntax error; forget it.
+                pass
+            elif outcome == FAIL:
+                if result is None: pass   # No match, as expected
+                else: print '=== Succeeded incorrectly', t
+            elif outcome == SUCCEED:
+                if result is not None:
+                    # Matched, as expected, so now we compute the
+                    # result string and compare it to our expected result.
+                    start, end = result.span(0)
+                    vardict={'found': result.group(0),
+                             'groups': result.group(),
+                             'flags': result.re.flags}
+                    for i in range(1, 100):
+                        try:
+                            gi = result.group(i)
+                            # Special hack because else the string concat fails:
+                            if gi is None:
+                                gi = "None"
+                        except IndexError:
+                            gi = "Error"
+                        vardict['g%d' % i] = gi
+                    for i in result.re.groupindex.keys():
+                        try:
+                            gi = result.group(i)
+                            if gi is None:
+                                gi = "None"
+                        except IndexError:
+                            gi = "Error"
+                        vardict[i] = gi
+                    repl = eval(repl, vardict)
+                    if repl != expected:
+                        print '=== grouping error', t,
+                        print repr(repl) + ' should be ' + repr(expected)
+                else:
+                    print '=== Failed incorrectly', t
+                # Try the match on a unicode string, and check that it
+                # still succeeds.
+                try:
+                    result = obj.search(unicode(s, "latin-1"))
+                    if result is None:
+                        print '=== Fails on unicode match', t
+                except NameError:
+                    continue # 1.5.2
+                except TypeError:
+                    continue # unicode test case
+                # Try the match on a unicode pattern, and check that it
+                # still succeeds.
+                obj=re.compile(unicode(pattern, "latin-1"))
+                result = obj.search(s)
+                if result is None:
+                    print '=== Fails on unicode pattern match', t
+                # Try the match with the search area limited to the extent
+                # of the match and see if it still succeeds.  \B will
+                # break (because it won't match at the end or start of a
+                # string), so we'll ignore patterns that feature it.
+                if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
+                               and result is not None:
+                    obj = re.compile(pattern)
+                    result = obj.search(s, result.start(0), result.end(0) + 1)
+                    if result is None:
+                        print '=== Failed on range-limited match', t
+                # Try the match with IGNORECASE enabled, and check that it
+                # still succeeds.
+                obj = re.compile(pattern, re.IGNORECASE)
+                result = obj.search(s)
+                if result is None:
+                    print '=== Fails on case-insensitive match', t
+                # Try the match with LOCALE enabled, and check that it
+                # still succeeds.
+                obj = re.compile(pattern, re.LOCALE)
+                result = obj.search(s)
+                if result is None:
+                    print '=== Fails on locale-sensitive match', t
+                # Try the match with UNICODE locale enabled, and check
+                # that it still succeeds.
+                obj = re.compile(pattern, re.UNICODE)
+                result = obj.search(s)
+                if result is None:
+                    print '=== Fails on unicode-sensitive match', t
+def test_main():
+    run_unittest(ReTests)
+    run_re_tests()
+if __name__ == "__main__":
+    test_main()
--- a/microbenchmarks/django_lexing.py
+++ b/microbenchmarks/django_lexing.py
+import os
+import sys
+sys.path.append(os.path.join(os.path.dirname(__file__), "../test/integration/django"))
+from django.template.base import Lexer, Parser
+import time
+try:
+    import __pyston__
+    pyston_loaded = True
+except:
+    pyston_loaded = False
+template_source = """
+{% extends "admin/base_site.html" %}
+{% load i18n admin_static %}
+{% block extrastyle %}{{ block.super }}<link rel="stylesheet" type="text/css" href="{% static "admin/css/dashboard.css" %}" />{% endblock %}
+{% block coltype %}colMS{% endblock %}
+{% block bodyclass %}{{ block.super }} dashboard{% endblock %}
+{% block breadcrumbs %}{% endblock %}
+{% block content %}
+<div id="content-main">
+{% if app_list %}
+    {% for app in app_list %}
+        <div class="app-{{ app.app_label }} module">
+        <table>
+        <caption>
+            <a href="{{ app.app_url }}" class="section" title="{% blocktrans with name=app.name %}Models in the {{ name }} application{% endblocktrans %}">{{ app.name }}</a>
+        </caption>
+        {% for model in app.models %}
+            <tr class="model-{{ model.object_name|lower }}">
+            {% if model.admin_url %}
+                <th scope="row"><a href="{{ model.admin_url }}">{{ model.name }}</a></th>
+            {% else %}
+                <th scope="row">{{ model.name }}</th>
+            {% endif %}
+            {% if model.add_url %}
+                <td><a href="{{ model.add_url }}" class="addlink">{% trans 'Add' %}</a></td>
+            {% else %}
+                <td>&nbsp;</td>
+            {% endif %}
+            {% if model.admin_url %}
+                <td><a href="{{ model.admin_url }}" class="changelink">{% trans 'Change' %}</a></td>
+            {% else %}
+                <td>&nbsp;</td>
+            {% endif %}
+            </tr>
+        {% endfor %}
+        </table>
+        </div>
+    {% endfor %}
+{% else %}
+    <p>{% trans "You don't have permission to edit anything." %}</p>
+{% endif %}
+</div>
+{% endblock %}
+{% block sidebar %}
+<div id="content-related">
+    <div class="module" id="recent-actions-module">
+        <h2>{% trans 'Recent Actions' %}</h2>
+        <h3>{% trans 'My Actions' %}</h3>
+            {% load log %}
+            {% get_admin_log 10 as admin_log for_user user %}
+            {% if not admin_log %}
+            <p>{% trans 'None available' %}</p>
+            {% else %}
+            <ul class="actionlist">
+            {% for entry in admin_log %}
+            <li class="{% if entry.is_addition %}addlink{% endif %}{% if entry.is_change %}changelink{% endif %}{% if entry.is_deletion %}deletelink{% endif %}">
+                {% if entry.is_deletion or not entry.get_admin_url %}
+                    {{ entry.object_repr }}
+                {% else %}
+                    <a href="{{ entry.get_admin_url }}">{{ entry.object_repr }}</a>
+                {% endif %}
+                <br/>
+                {% if entry.content_type %}
+                    <span class="mini quiet">{% filter capfirst %}{% trans entry.content_type.name %}{% endfilter %}</span>
+                {% else %}
+                    <span class="mini quiet">{% trans 'Unknown content' %}</span>
+                {% endif %}
+            </li>
+            {% endfor %}
+            </ul>
+            {% endif %}
+    </div>
+</div>
+{% endblock %}
+"""
+elapsed = 0
+for i in xrange(5000):
+    # print i
+    lexer = Lexer(template_source, None)
+    lexer.tokenize()
--- a/microbenchmarks/re_split_ubench.py
+++ b/microbenchmarks/re_split_ubench.py
+import re
+FILTER_SEPARATOR = '|'
+FILTER_ARGUMENT_SEPARATOR = ':'
+VARIABLE_ATTRIBUTE_SEPARATOR = '.'
+BLOCK_TAG_START = '{%'
+BLOCK_TAG_END = '%}'
+VARIABLE_TAG_START = '{{'
+VARIABLE_TAG_END = '}}'
+COMMENT_TAG_START = '{#'
+COMMENT_TAG_END = '#}'
+TRANSLATOR_COMMENT_MARK = 'Translators'
+SINGLE_BRACE_START = '{'
+SINGLE_BRACE_END = '}'
+tag_re = (re.compile('(%s.*?%s|%s.*?%s|%s.*?%s)' %
+          (re.escape(BLOCK_TAG_START), re.escape(BLOCK_TAG_END),
+           re.escape(VARIABLE_TAG_START), re.escape(VARIABLE_TAG_END),
+           re.escape(COMMENT_TAG_START), re.escape(COMMENT_TAG_END))))
+template_source = """
+{% extends "admin/base_site.html" %}
+{% load i18n admin_static %}
+{% block extrastyle %}{{ block.super }}<link rel="stylesheet" type="text/css" href="{% static "admin/css/dashboard.css" %}" />{% endblock %}
+{% block coltype %}colMS{% endblock %}
+{% block bodyclass %}{{ block.super }} dashboard{% endblock %}
+{% block breadcrumbs %}{% endblock %}
+{% block content %}
+<div id="content-main">
+{% if app_list %}
+    {% for app in app_list %}
+        <div class="app-{{ app.app_label }} module">
+        <table>
+        <caption>
+            <a href="{{ app.app_url }}" class="section" title="{% blocktrans with name=app.name %}Models in the {{ name }} application{% endblocktrans %}">{{ app.name }}</a>
+        </caption>
+        {% for model in app.models %}
+            <tr class="model-{{ model.object_name|lower }}">
+            {% if model.admin_url %}
+                <th scope="row"><a href="{{ model.admin_url }}">{{ model.name }}</a></th>
+            {% else %}
+                <th scope="row">{{ model.name }}</th>
+            {% endif %}
+            {% if model.add_url %}
+                <td><a href="{{ model.add_url }}" class="addlink">{% trans 'Add' %}</a></td>
+            {% else %}
+                <td>&nbsp;</td>
+            {% endif %}
+            {% if model.admin_url %}
+                <td><a href="{{ model.admin_url }}" class="changelink">{% trans 'Change' %}</a></td>
+            {% else %}
+                <td>&nbsp;</td>
+            {% endif %}
+            </tr>
+        {% endfor %}
+        </table>
+        </div>
+    {% endfor %}
+{% else %}
+    <p>{% trans "You don't have permission to edit anything." %}</p>
+{% endif %}
+</div>
+{% endblock %}
+{% block sidebar %}
+<div id="content-related">
+    <div class="module" id="recent-actions-module">
+        <h2>{% trans 'Recent Actions' %}</h2>
+        <h3>{% trans 'My Actions' %}</h3>
+            {% load log %}
+            {% get_admin_log 10 as admin_log for_user user %}
+            {% if not admin_log %}
+            <p>{% trans 'None available' %}</p>
+            {% else %}
+            <ul class="actionlist">
+            {% for entry in admin_log %}
+            <li class="{% if entry.is_addition %}addlink{% endif %}{% if entry.is_change %}changelink{% endif %}{% if entry.is_deletion %}deletelink{% endif %}">
+                {% if entry.is_deletion or not entry.get_admin_url %}
+                    {{ entry.object_repr }}
+                {% else %}
+                    <a href="{{ entry.get_admin_url }}">{{ entry.object_repr }}</a>
+                {% endif %}
+                <br/>
+                {% if entry.content_type %}
+                    <span class="mini quiet">{% filter capfirst %}{% trans entry.content_type.name %}{% endfilter %}</span>
+                {% else %}
+                    <span class="mini quiet">{% trans 'Unknown content' %}</span>
+                {% endif %}
+            </li>
+            {% endfor %}
+            </ul>
+            {% endif %}
+    </div>
+</div>
+{% endblock %}
+"""
+for i in xrange(30000):
+    tag_re.split(template_source)
--- a/src/codegen/ast_interpreter.cpp
+++ b/src/codegen/ast_interpreter.cpp
@@ -1704,6 +1704,7 @@ Box* astInterpretFunction(CLFunction* clfunc, int nargs, Box* closure, Box* gene
        clfunc->dependent_interp_callsites.invalidateAll();
+        UNAVOIDABLE_STAT_TIMER(t0, "us_timer_in_jitted_code");
        if (closure && generator)
            return optimized->closure_generator_call((BoxedClosure*)closure, (BoxedGenerator*)generator, arg1, arg2,
                                                     arg3, args);

--- a/src/codegen/codegen.h
+++ b/src/codegen/codegen.h
@@ -87,8 +87,6 @@ extern GlobalState g;
 // in runtime_hooks.cpp:
 void initGlobalFuncs(GlobalState& g);
-extern int sigprof_pending;
 DS_DECLARE_RWLOCK(codegen_rwlock);
 }

--- a/src/codegen/entry.cpp
+++ b/src/codegen/entry.cpp
@@ -55,8 +55,6 @@
 namespace pyston {
-int sigprof_pending = 0;
 GlobalState g;
 extern "C" {
@@ -358,9 +356,13 @@ static void handle_sigusr1(int signum) {
    _printStacktrace();
 }
+#if ENABLE_SAMPLING_PROFILER
+int sigprof_pending = 0;
 static void handle_sigprof(int signum) {
    sigprof_pending++;
 }
+#endif
 //#define INVESTIGATE_STAT_TIMER "us_timer_in_jitted_code"
 #ifdef INVESTIGATE_STAT_TIMER

--- a/src/codegen/irgen/util.cpp
+++ b/src/codegen/irgen/util.cpp
@@ -197,7 +197,7 @@ public:
                int pp_id = -1;
                for (int i = 0; i < ii->getNumArgOperands(); i++) {
                    llvm::Value* op = ii->getArgOperand(i);
-                    if (i != 1) {
+                    if (i != 2) {
                        if (i == 0) {
                            llvm::ConstantInt* l_pp_id = llvm::cast<llvm::ConstantInt>(op);
                            pp_id = l_pp_id->getSExtValue();

--- a/src/core/threading.h
+++ b/src/core/threading.h
@@ -32,6 +32,11 @@ namespace gc {
 class GCVisitor;
 }
+#if ENABLE_SAMPLING_PROFILER
+extern int sigprof_pending;
+void _printStacktrace();
+#endif
 namespace threading {
 // Whether or not a second thread was ever started:

--- a/src/runtime/capi.cpp
+++ b/src/runtime/capi.cpp
@@ -1491,7 +1491,7 @@ Box* BoxedCApiFunction::tppCall(Box* _self, CallRewriteArgs* rewrite_args, ArgPa
    if (!rewrite_success)
        rewrite_args = NULL;
-    RewriterVar* r_passthrough;
+    RewriterVar* r_passthrough = NULL;
    if (rewrite_args)
        r_passthrough = rewrite_args->rewriter->loadConst((intptr_t)self->passthrough, Location::forArg(0));

--- a/src/runtime/int.cpp
+++ b/src/runtime/int.cpp
@@ -843,7 +843,19 @@ extern "C" Box* intTrunc(BoxedInt* self) {
        raiseExcHelper(TypeError, "descriptor '__trunc__' requires a 'int' object but received a '%s'",
                       getTypeName(self));
+    if (self->cls == int_cls)
+        return self;
+    return boxInt(self->n);
+}
+extern "C" Box* intInt(BoxedInt* self) {
+    if (!isSubclass(self->cls, int_cls))
+        raiseExcHelper(TypeError, "descriptor '__int__' requires a 'int' object but received a '%s'",
+                       getTypeName(self));
+    if (self->cls == int_cls)
        return self;
+    return boxInt(self->n);
 }
 extern "C" Box* intIndex(BoxedInt* v) {
@@ -853,7 +865,7 @@ extern "C" Box* intIndex(BoxedInt* v) {
 }
 static Box* _intNew(Box* val, Box* base) {
-    if (isSubclass(val->cls, int_cls)) {
+    if (val->cls == int_cls) {
        RELEASE_ASSERT(!base, "");
        BoxedInt* n = static_cast<BoxedInt*>(val);
        if (val->cls == int_cls)
@@ -890,8 +902,33 @@ static Box* _intNew(Box* val, Box* base) {
        return r;
    } else if (val->cls == float_cls) {
        RELEASE_ASSERT(!base, "");
-        double d = static_cast<BoxedFloat*>(val)->d;
-        return new BoxedInt(d);
+        // This is tricky -- code copied from CPython:
+        double x = PyFloat_AsDouble(val);
+        double wholepart; /* integral portion of x, rounded toward 0 */
+        (void)modf(x, &wholepart);
+        /* Try to get out cheap if this fits in a Python int.  The attempt
+         * to cast to long must be protected, as C doesn't define what
+         * happens if the double is too big to fit in a long.  Some rare
+         * systems raise an exception then (RISCOS was mentioned as one,
+         * and someone using a non-default option on Sun also bumped into
+         * that).  Note that checking for <= LONG_MAX is unsafe: if a long
+         * has more bits of precision than a double, casting LONG_MAX to
+         * double may yield an approximation, and if that's rounded up,
+         * then, e.g., wholepart=LONG_MAX+1 would yield true from the C
+         * expression wholepart<=LONG_MAX, despite that wholepart is
+         * actually greater than LONG_MAX.  However, assuming a two's complement
+         * machine with no trap representation, LONG_MIN will be a power of 2 (and
+         * hence exactly representable as a double), and LONG_MAX = -1-LONG_MIN, so
+         * the comparisons with (double)LONG_MIN below should be safe.
+         */
+        if ((double)LONG_MIN <= wholepart && wholepart < -(double)LONG_MIN) {
+            const long aslong = (long)wholepart;
+            return PyInt_FromLong(aslong);
+        }
+        return PyLong_FromDouble(wholepart);
    } else {
        RELEASE_ASSERT(!base, "");
        static BoxedString* int_str = static_cast<BoxedString*>(PyString_InternFromString("__int__"));
@@ -992,7 +1029,7 @@ static void _addFuncIntUnknown(const char* name, ConcreteCompilerType* rtn_type,
    int_cls->giveAttr(name, new BoxedFunction(cl));
 }
-static Box* intInt(Box* b, void*) {
+static Box* intIntGetset(Box* b, void*) {
    if (b->cls == int_cls) {
        return b;
    } else {
@@ -1081,6 +1118,7 @@ void setupInt() {
    int_cls->giveAttr("__trunc__", new BoxedFunction(boxRTFunction((void*)intTrunc, BOXED_INT, 1)));
    int_cls->giveAttr("__index__", new BoxedFunction(boxRTFunction((void*)intIndex, BOXED_INT, 1)));
+    int_cls->giveAttr("__int__", new BoxedFunction(boxRTFunction((void*)intInt, BOXED_INT, 1)));
    int_cls->giveAttr("__new__", new BoxedFunction(boxRTFunction((void*)intNew, UNKNOWN, 3, 2, false, false,
                                                                 ParamNames({ "", "x", "base" }, "", "")),
@@ -1088,10 +1126,10 @@ void setupInt() {
    int_cls->giveAttr("bit_length", new BoxedFunction(boxRTFunction((void*)intBitLength, BOXED_INT, 1)));
-    int_cls->giveAttr("real", new (pyston_getset_cls) BoxedGetsetDescriptor(intInt, NULL, NULL));
+    int_cls->giveAttr("real", new (pyston_getset_cls) BoxedGetsetDescriptor(intIntGetset, NULL, NULL));
    int_cls->giveAttr("imag", new (pyston_getset_cls) BoxedGetsetDescriptor(int0, NULL, NULL));
-    int_cls->giveAttr("conjugate", new BoxedFunction(boxRTFunction((void*)intInt, BOXED_INT, 1)));
+    int_cls->giveAttr("conjugate", new BoxedFunction(boxRTFunction((void*)intIntGetset, BOXED_INT, 1)));
-    int_cls->giveAttr("numerator", new (pyston_getset_cls) BoxedGetsetDescriptor(intInt, NULL, NULL));
+    int_cls->giveAttr("numerator", new (pyston_getset_cls) BoxedGetsetDescriptor(intIntGetset, NULL, NULL));
    int_cls->giveAttr("denominator", new (pyston_getset_cls) BoxedGetsetDescriptor(int1, NULL, NULL));
    add_operators(int_cls);

--- a/src/runtime/objmodel.cpp
+++ b/src/runtime/objmodel.cpp
@@ -4090,7 +4090,7 @@ Box* compareInternal(Box* lhs, Box* rhs, int op_type, CompareRewriteArgs* rewrit
        }
        Box* contained;
-        RewriterVar* r_contained;
+        RewriterVar* r_contained = NULL;
        if (rewrite_args) {
            CallRewriteArgs crewrite_args(rewrite_args->rewriter, rewrite_args->rhs, rewrite_args->destination);
            crewrite_args.arg1 = rewrite_args->lhs;

--- a/src/runtime/types.cpp
+++ b/src/runtime/types.cpp
@@ -783,28 +783,30 @@ static Box* typeCallInner(CallRewriteArgs* rewrite_args, ArgPassSpec argspec, Bo
        }
    }
-    bool type_new_special_case;
+    // For debugging, keep track of why we think we can rewrite this:
+    enum { NOT_ALLOWED, VERIFIED, NO_INIT, TYPE_NEW_SPECIAL_CASE, } why_rewrite_allowed = NOT_ALLOWED;
    if (rewrite_args) {
-        bool ok = false;
        for (auto b : allowable_news) {
            if (b == new_attr) {
-                ok = true;
+                why_rewrite_allowed = VERIFIED;
                break;
            }
        }
-        if (!ok && (cls == int_cls || cls == float_cls || cls == long_cls)) {
+        if (cls == int_cls || cls == float_cls || cls == long_cls) {
-            if (npassed_args == 1)
+            if (npassed_args == 1) {
-                ok = true;
+                why_rewrite_allowed = VERIFIED;
-            else if (npassed_args == 2 && (arg2->cls == int_cls || arg2->cls == str_cls || arg2->cls == float_cls)) {
+            } else if (npassed_args == 2 && (arg2->cls == int_cls || arg2->cls == str_cls || arg2->cls == float_cls)) {
+                why_rewrite_allowed = NO_INIT;
                rewrite_args->arg2->addAttrGuard(offsetof(Box, cls), (intptr_t)arg2->cls);
-                ok = true;
            }
        }
-        type_new_special_case = (cls == type_cls && argspec == ArgPassSpec(2));
+        if (cls == type_cls && argspec == ArgPassSpec(2))
+            why_rewrite_allowed = TYPE_NEW_SPECIAL_CASE;
-        if (!ok && !type_new_special_case) {
+        if (why_rewrite_allowed == NOT_ALLOWED) {
            // Uncomment this to try to find __new__ functions that we could either white- or blacklist:
            // ASSERT(cls->is_user_defined || cls == type_cls, "Does '%s' have a well-behaved __new__?  if so, add to
            // allowable_news, otherwise add to the blacklist in this assert", cls->tp_name);
@@ -869,7 +871,8 @@ static Box* typeCallInner(CallRewriteArgs* rewrite_args, ArgPassSpec argspec, Bo
            }
        }
-        ASSERT(made->cls == cls || type_new_special_case,
+        ASSERT(made->cls == cls || why_rewrite_allowed == TYPE_NEW_SPECIAL_CASE
+                   || (why_rewrite_allowed == NO_INIT && cls->tp_init == object_cls->tp_init),
               "We should only have allowed the rewrite to continue if we were guaranteed that made "
               "would have class cls!");
    } else {
@@ -893,7 +896,9 @@ static Box* typeCallInner(CallRewriteArgs* rewrite_args, ArgPassSpec argspec, Bo
    // If __new__ returns a subclass, supposed to call that subclass's __init__.
    // If __new__ returns a non-subclass, not supposed to call __init__.
    if (made->cls != cls) {
-        ASSERT(rewrite_args == NULL, "We should only have allowed the rewrite to continue if we were guaranteed that "
+        ASSERT(rewrite_args == NULL || (why_rewrite_allowed == NO_INIT && made->cls->tp_init == object_cls->tp_init
+                                        && cls->tp_init == object_cls->tp_init),
+               "We should only have allowed the rewrite to continue if we were guaranteed that "
               "made would have class cls!");
        if (!isSubclass(made->cls, cls)) {

--- a/src/runtime/types.h
+++ b/src/runtime/types.h
@@ -208,7 +208,7 @@ public:
    pyston_call tpp_call;
    bool hasGenericGetattr() {
-        if (tp_getattr)
+        if (tp_getattr || tp_getattro != object_cls->tp_getattro)
            return false;
        // instancemethod_cls should have a custom tp_getattr but is currently implemented

--- a/test/cpython/test_re.py
+++ b/test/cpython/test_re.py
+../../from_cpython/Lib/test/test_re.py
\ No newline at end of file
--- a/test/tests/intmethods.py
+++ b/test/tests/intmethods.py
@@ -10,6 +10,11 @@ for i in xrange(1, 12):
        print i & j
        print i ^ j
+print (2).__int__()
+print (True).__int__()
+print (2).__trunc__()
+print (True).__trunc__()
 print 1 ** 0
 print 0 ** 0
 print -1 ** 0, (-1) ** 0, (-5) ** 0
@@ -93,3 +98,38 @@ for i1 in [1, I(2), 3, I(4)]:
 print int("12345", base=16)
 print type(2 ** 48)
+class I(int):
+    def __init__(self, n):
+        print "I.__init__(%r)" % n
+        self.n = n
+    def __int__(self):
+        return self
+    def __repr__(self):
+        return "<I(%r)>" % self.n
+def call_int(i):
+    print "calling int(%r)" % i
+    i2 = int(i)
+    print "return type:", type(i2)
+print
+call_int(1)
+print
+i = I(1)
+call_int(i)
+print "i.n is a", type(i.n) # should be 'I' now!
+print
+del I.__int__
+i = I(1)
+call_int(i)
+print
+# These return longs:
+print int("12938719238719827398172938712983791827938712987312")
+print int(1e100)