Merge branch 'master' into ctuple

Conflicts: CHANGES.rst

Merge branch 'master' into ctuple
Conflicts: CHANGES.rst
63c07e29 · Robert Bradshaw · b7d80418 · 99e68228 · 63c07e29 · 63c07e29
Commit 63c07e29 authored Oct 17, 2014 by Robert Bradshaw
57 changed files
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,12 +22,14 @@ before_install:
  - sudo apt-get install gdb python$( python -c 'import sys; print("%d.%d" % sys.version_info[:2])' )-dbg || true
  - dpkg -l | grep gdb || true

-install: CFLAGS="-O2 -ggdb" pip install .
+install:
+  - CFLAGS="-O2 -ggdb -Wall -Wextra $(python -c 'import sys; print("-fno-strict-aliasing" if sys.version_info[0] == 2 else "")')" python setup.py build

 script:
  - PYTHON_DBG="python$( python -c 'import sys; print("%d.%d" % sys.version_info[:2])' )-dbg"
  - if $PYTHON_DBG -V >&2; then CFLAGS="-O0 -ggdb" $PYTHON_DBG runtests.py -vv Debugger --backends=$BACKEND; fi
-  - CFLAGS="-O0 -ggdb" python runtests.py -vv -x Debugger --backends=$BACKEND
+  - CFLAGS="-O2 -ggdb -Wall -Wextra" python setup.py build_ext -i
+  - CFLAGS="-O0 -ggdb -Wall -Wextra" python runtests.py -vv -x Debugger --backends=$BACKEND

 matrix:
  allow_failures:
@@ -38,4 +40,3 @@ matrix:
      env: BACKEND=cpp
    - python: pypy3
      env: BACKEND=cpp      
-  fast_finish: true
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -8,6 +8,21 @@ Latest
 Features added
 --------------

+* C functions can coerce to Python functions, which allows passing them
+  around as callable objects.
+
+* New ``cythonize`` option ``-a`` to generate the annotated HTML source view.
+
+* Extern C functions can now be declared as cpdef to export them to
+  the module's Python namespace.  Extern C functions in pxd files export
+  their values to their own module, iff it exists.
+
+* Missing C-API declarations in ``cpython.unicode`` were added.
+
+* Passing ``language='c++'`` into cythonize() globally enables C++ mode for
+  all modules that were not passed as Extension objects (i.e. only source
+  files and file patterns).
+
 * ``Py_hash_t`` is a known type (used in CPython for hash values).

 * ``PySlice_*()`` C-API functions are available from the ``cpython.slice``
@@ -15,14 +30,20 @@ Features added

 * Anonymous C tuple types can be declared as (ctype1, ctype2, ...).

+* Allow arrays of C++ classes.
+
 Bugs fixed
 ----------

+* Mismatching 'except' declarations on signatures in .pxd and .pyx files failed
+  to produce a compile error.
+
 * Reference leak for non-simple Python expressions in boolean and/or expressions.

-* ``getitimer()``, ``setitimer()``, ``gettimeofday()`` and related type/constant
-  definitions were moved from ``posix/time.pxd`` to ``posix/sys_time.pxd`` to
-  fix a naming collision.
+* To fix a name collision and to reflect availability on host platforms,
+  standard C declarations [ clock(), time(), struct tm and tm* functions ]
+  were moved from posix/time.pxd to a new libc/time.pxd.  Patch by Charles
+  Blake.

 * Rerunning unmodified modules in IPython's cython support failed.
  Patch by Matthias Bussonier.
@@ -34,7 +55,12 @@ Bugs fixed
  if the already created module was used later on (e.g. through a
  stale reference in sys.modules or elsewhere).

-* Allow arrays of C++ classes.
+Other changes
+-------------
+
+* Compilation no longer fails hard when unknown compilation options are
+  passed.  Instead, it raises a warning and ignores them (as it did silently
+  before 0.21).  This will be changed back to an error in a future release.


 0.21 (2014-09-10)

--- a/Cython/Build/Cythonize.py
+++ b/Cython/Build/Cythonize.py
@@ -145,6 +145,8 @@ def parse_args(args):
                      help='set a cythonize option')
    parser.add_option('-3', dest='python3_mode', action='store_true',
                      help='use Python 3 syntax mode by default')
+    parser.add_option('-a', '--annotate', dest='annotate', action='store_true',
+                      help='generate annotated HTML page for source files')

    parser.add_option('-x', '--exclude', metavar='PATTERN', dest='excludes',
                      action='append', default=[],
@@ -188,6 +190,9 @@ def main(args=None):
        Options.error_on_unknown_names = False
        Options.error_on_uninitialized = False

+    if options.annotate:
+        Options.annotate = True
+
    for path in paths:
        cython_compile(path, options)


--- a/Cython/Build/Dependencies.py
+++ b/Cython/Build/Dependencies.py
@@ -251,6 +251,7 @@ def strip_string_literals(code, prefix='__Pyx_L'):
    in_quote = False
    hash_mark = single_q = double_q = -1
    code_len = len(code)
+    quote_type = quote_len = None

    while True:
        if hash_mark < q:
@@ -260,7 +261,8 @@ def strip_string_literals(code, prefix='__Pyx_L'):
        if double_q < q:
            double_q = code.find('"', q)
        q = min(single_q, double_q)
-        if q == -1: q = max(single_q, double_q)
+        if q == -1:
+            q = max(single_q, double_q)

        # We're done.
        if q == -1 and hash_mark == -1:
@@ -276,7 +278,8 @@ def strip_string_literals(code, prefix='__Pyx_L'):
                if k % 2 == 0:
                    q += 1
                    continue
-            if code[q] == quote_type and (quote_len == 1 or (code_len > q + 2 and quote_type == code[q+1] == code[q+2])):
+            if code[q] == quote_type and (
+                    quote_len == 1 or (code_len > q + 2 and quote_type == code[q+1] == code[q+2])):
                counter += 1
                label = "%s%s_" % (prefix, counter)
                literals[label] = code[start+quote_len:q]
@@ -586,7 +589,8 @@ def create_dependency_tree(ctx=None, quiet=False):


 # This may be useful for advanced users?
-def create_extension_list(patterns, exclude=[], ctx=None, aliases=None, quiet=False, exclude_failures=False):
+def create_extension_list(patterns, exclude=[], ctx=None, aliases=None, quiet=False, language=None,
+                          exclude_failures=False):
    if not isinstance(patterns, (list, tuple)):
        patterns = [patterns]
    explicit_modules = set([m.name for m in patterns if isinstance(m, Extension)])
@@ -606,6 +610,7 @@ def create_extension_list(patterns, exclude=[], ctx=None, aliases=None, quiet=Fa
            name = '*'
            base = None
            exn_type = Extension
+            ext_language = language
        elif isinstance(pattern, Extension):
            for filepattern in pattern.sources:
                if os.path.splitext(filepattern)[1] in ('.py', '.pyx'):
@@ -618,6 +623,7 @@ def create_extension_list(patterns, exclude=[], ctx=None, aliases=None, quiet=Fa
            name = template.name
            base = DistutilsInfo(exn=template)
            exn_type = template.__class__
+            ext_language = None  # do not override whatever the Extension says
        else:
            raise TypeError(pattern)

@@ -661,6 +667,9 @@ def create_extension_list(patterns, exclude=[], ctx=None, aliases=None, quiet=Fa
                        depends = list(set(template.depends).union(set(depends)))
                    kwds['depends'] = depends

+                if ext_language and 'language' not in kwds:
+                    kwds['language'] = ext_language
+
                module_list.append(exn_type(
                        name=module_name,
                        sources=sources,
@@ -671,7 +680,7 @@ def create_extension_list(patterns, exclude=[], ctx=None, aliases=None, quiet=Fa


 # This is the user-exposed entry point.
-def cythonize(module_list, exclude=[], nthreads=0, aliases=None, quiet=False, force=False,
+def cythonize(module_list, exclude=[], nthreads=0, aliases=None, quiet=False, force=False, language=None,
              exclude_failures=False, **options):
    """
    Compile a set of source modules into C/C++ files and return a list of distutils
@@ -684,6 +693,11 @@ def cythonize(module_list, exclude=[], nthreads=0, aliases=None, quiet=False, fo
    When using glob patterns, you can exclude certain module names explicitly
    by passing them into the 'exclude' option.

+    To globally enable C++ mode, you can pass language='c++'.  Otherwise, this
+    will be determined at a per-file level based on compiler directives.  This
+    affects only modules found based on file names.  Extension instances passed
+    into cythonize() will not be changed.
+
    For parallel compilation, set the 'nthreads' option to the number of
    concurrent builds.

@@ -711,6 +725,7 @@ def cythonize(module_list, exclude=[], nthreads=0, aliases=None, quiet=False, fo
        ctx=ctx,
        quiet=quiet,
        exclude_failures=exclude_failures,
+        language=language,
        aliases=aliases)
    deps = create_dependency_tree(ctx, quiet=quiet)
    build_dir = getattr(options, 'build_dir', None)

--- a/Cython/Compiler/ExprNodes.py
+++ b/Cython/Compiler/ExprNodes.py
@@ -4282,19 +4282,6 @@ class SliceNode(ExprNode):
        if self.is_literal:
            code.put_giveref(self.py_result())

-    def __deepcopy__(self, memo):
-        """
-        There is a copy bug in python 2.4 for slice objects.
-        """
-        return SliceNode(
-            self.pos,
-            start=copy.deepcopy(self.start, memo),
-            stop=copy.deepcopy(self.stop, memo),
-            step=copy.deepcopy(self.step, memo),
-            is_temp=self.is_temp,
-            is_literal=self.is_literal,
-            constant_result=self.constant_result)
-

 class CallNode(ExprNode):

@@ -6064,8 +6051,10 @@ class SequenceNode(ExprNode):
                if isinstance(mult_factor.constant_result, (int,long)) \
                       and mult_factor.constant_result > 0:
                    size_factor = ' * %s' % mult_factor.constant_result
-                else:
+                elif mult_factor.type.signed:
                    size_factor = ' * ((%s<0) ? 0:%s)' % (c_mult, c_mult)
+                else:
+                    size_factor = ' * (%s)' % (c_mult,)

        if self.type is Builtin.tuple_type and (self.is_literal or self.slow) and not c_mult:
            # use PyTuple_Pack() to avoid generating huge amounts of one-time code
@@ -7597,7 +7586,7 @@ class BoundMethodNode(ExprNode):

    def generate_result_code(self, code):
        code.putln(
-            "%s = PyMethod_New(%s, %s, (PyObject*)%s->ob_type); %s" % (
+            "%s = __Pyx_PyMethod_New(%s, %s, (PyObject*)%s->ob_type); %s" % (
                self.result(),
                self.function.py_result(),
                self.self_object.py_result(),
@@ -7629,7 +7618,7 @@ class UnboundMethodNode(ExprNode):
    def generate_result_code(self, code):
        class_cname = code.pyclass_stack[-1].classobj.result()
        code.putln(
-            "%s = PyMethod_New(%s, 0, %s); %s" % (
+            "%s = __Pyx_PyMethod_New(%s, 0, %s); %s" % (
                self.result(),
                self.function.py_result(),
                class_cname,

--- a/Cython/Compiler/FlowControl.py
+++ b/Cython/Compiler/FlowControl.py
@@ -634,7 +634,7 @@ def check_definitions(flow, compiler_directives):
    for entry in flow.entries:
        if (not entry.cf_references
                and not entry.is_pyclass_attr):
-            if entry.name != '_':
+            if entry.name != '_' and not entry.name.startswith('unused'):
                # '_' is often used for unused variables, e.g. in loops
                if entry.is_arg:
                    if warn_unused_arg:

--- a/Cython/Compiler/Main.py
+++ b/Cython/Compiler/Main.py
@@ -359,10 +359,9 @@ class Context(object):
        return ".".join(names)

    def setup_errors(self, options, result):
-        Errors.reset() # clear any remaining error state
+        Errors.reset()  # clear any remaining error state
        if options.use_listing_file:
-            result.listing_file = Utils.replace_suffix(source, ".lis")
-            path = result.listing_file
+            path = result.listing_file = Utils.replace_suffix(result.main_source_file, ".lis")
        else:
            path = None
        Errors.open_listing_file(path=path,
@@ -499,11 +498,14 @@ class CompilationOptions(object):
        # ignore valid options that are not in the defaults
        unknown_options.difference_update(['include_path'])
        if unknown_options:
-            raise ValueError("got unexpected compilation option%s: %s" % (
+            # TODO: make this a hard error in 0.22
+            message = "got unknown compilation option%s, please remove: %s" % (
                's' if len(unknown_options) > 1 else '',
-                ', '.join(unknown_options)))
+                ', '.join(unknown_options))
+            import warnings
+            warnings.warn(message)

-        directives = dict(options['compiler_directives']) # copy mutable field
+        directives = dict(options['compiler_directives'])  # copy mutable field
        options['compiler_directives'] = directives
        if 'language_level' in directives and 'language_level' not in kw:
            options['language_level'] = int(directives['language_level'])

--- a/Cython/Compiler/MemoryView.py
+++ b/Cython/Compiler/MemoryView.py
@@ -439,13 +439,16 @@ def get_is_contig_utility(c_contig, ndim):

    return utility

+
 def copy_src_to_dst_cname():
    return "__pyx_memoryview_copy_contents"

+
 def verify_direct_dimensions(node):
    for access, packing in node.type.axes:
        if access != 'direct':
-            error(self.pos, "All dimensions must be direct")
+            error(node.pos, "All dimensions must be direct")
+

 def copy_broadcast_memview_src_to_dst(src, dst, code):
    """
@@ -662,7 +665,7 @@ def get_axes_specs(env, axes):
            if entry.name in view_constant_to_access_packing:
                axes_specs.append(view_constant_to_access_packing[entry.name])
            else:
-                raise CompilerError(axis.step.pos, INVALID_ERR)
+                raise CompileError(axis.step.pos, INVALID_ERR)

        else:
            raise CompileError(axis.step.pos, INVALID_ERR)

--- a/Cython/Compiler/Nodes.py
+++ b/Cython/Compiler/Nodes.py
@@ -1289,6 +1289,11 @@ class CVarDefNode(StatNode):
                    "Non-trivial type declarators in shared declaration (e.g. mix of pointers and values). " +
                    "Each pointer declaration should be on its own line.", 1)

+            create_extern_wrapper = (self.overridable
+                                        and self.visibility == 'extern'
+                                        and env.is_module_scope)
+            if create_extern_wrapper:
+                declarator.overridable = False
            if isinstance(declarator, CFuncDeclaratorNode):
                name_declarator, type = declarator.analyse(base_type, env, directive_locals=self.directive_locals)
            else:
@@ -1314,6 +1319,9 @@ class CVarDefNode(StatNode):
                    self.entry.directive_locals = copy.copy(self.directive_locals)
                if 'staticmethod' in env.directives:
                    type.is_static_method = True
+                if create_extern_wrapper:
+                    self.entry.type.create_to_py_utility_code(env)
+                    self.entry.create_wrapper = True
            else:
                if self.directive_locals:
                    error(self.pos, "Decorators can only be followed by functions")
@@ -1601,7 +1609,7 @@ class FuncDefNode(StatNode, BlockNode):
        if arg.name in directive_locals:
            type_node = directive_locals[arg.name]
            other_type = type_node.analyse_as_type(env)
-        elif isinstance(arg, CArgDeclNode) and arg.annotation:
+        elif isinstance(arg, CArgDeclNode) and arg.annotation and env.directives['annotation_typing']:
            type_node = arg.annotation
            other_type = arg.inject_type_from_annotations(env)
            if other_type is None:

--- a/Cython/Compiler/PyrexTypes.py
+++ b/Cython/Compiler/PyrexTypes.py
@@ -1244,7 +1244,10 @@ class CConstType(BaseType):

    def declaration_code(self, entity_code,
            for_display = 0, dll_linkage = None, pyrex = 0):
-        return self.const_base_type.declaration_code("const %s" % entity_code, for_display, dll_linkage, pyrex)
+        if for_display or pyrex:
+            return "const " + self.const_base_type.declaration_code(entity_code, for_display, dll_linkage, pyrex)
+        else:
+            return self.const_base_type.declaration_code("const %s" % entity_code, for_display, dll_linkage, pyrex)

    def specialize(self, values):
        base_type = self.const_base_type.specialize(values)
@@ -1539,8 +1542,10 @@ class CBIntType(CIntType):

    def declaration_code(self, entity_code,
            for_display = 0, dll_linkage = None, pyrex = 0):
-        if pyrex or for_display:
+        if for_display:
            base_code = 'bool'
+        elif pyrex:
+            base_code = 'bint'
        else:
            base_code = public_decl('int', dll_linkage)
        return self.base_declaration_code(base_code, entity_code)
@@ -2410,6 +2415,10 @@ class CFuncType(CType):
            return 0
        if not self.same_calling_convention_as(other_type):
            return 0
+        if self.exception_value != other_type.exception_value:
+            return 0
+        if self.exception_check != other_type.exception_check:
+            return 0
        return 1

    def compatible_signature_with(self, other_type, as_cmethod = 0):
@@ -2444,10 +2453,14 @@ class CFuncType(CType):
            return 0
        if self.nogil != other_type.nogil:
            return 0
+        if self.exception_value != other_type.exception_value:
+            return 0
+        if not self.exception_check and other_type.exception_check:
+            # a redundant exception check doesn't make functions incompatible, but a missing one does
+            return 0
        self.original_sig = other_type.original_sig or other_type
        return 1

-
    def narrower_c_signature_than(self, other_type, as_cmethod = 0):
        return self.narrower_c_signature_than_resolved_type(other_type.resolve(), as_cmethod)

@@ -2471,6 +2484,11 @@ class CFuncType(CType):
            return 0
        if not self.return_type.subtype_of_resolved_type(other_type.return_type):
            return 0
+        if self.exception_value != other_type.exception_value:
+            return 0
+        if not self.exception_check and other_type.exception_check:
+            # a redundant exception check doesn't make functions incompatible, but a missing one does
+            return 0
        return 1

    def same_calling_convention_as(self, other):
@@ -2487,22 +2505,12 @@ class CFuncType(CType):
        sc2 = other.calling_convention == '__stdcall'
        return sc1 == sc2

-    def same_exception_signature_as(self, other_type):
-        return self.same_exception_signature_as_resolved_type(
-            other_type.resolve())
-
-    def same_exception_signature_as_resolved_type(self, other_type):
-        return self.exception_value == other_type.exception_value \
-            and self.exception_check == other_type.exception_check
-
    def same_as_resolved_type(self, other_type, as_cmethod = 0):
        return self.same_c_signature_as_resolved_type(other_type, as_cmethod) \
-            and self.same_exception_signature_as_resolved_type(other_type) \
            and self.nogil == other_type.nogil

    def pointer_assignable_from_resolved_type(self, other_type):
        return self.same_c_signature_as_resolved_type(other_type) \
-            and self.same_exception_signature_as_resolved_type(other_type) \
            and not (self.nogil and not other_type.nogil)

    def declaration_code(self, entity_code,
@@ -2649,6 +2657,74 @@ class CFuncType(CType):
        assert not self.is_fused
        specialize_entry(entry, cname)

+    def create_to_py_utility_code(self, env):
+        # FIXME: it seems we're trying to coerce in more cases than we should
+        if self.has_varargs or self.optional_arg_count:
+            return False
+        if self.to_py_function is not None:
+            return self.to_py_function
+        from .UtilityCode import CythonUtilityCode
+        import re
+        safe_typename = re.sub('[^a-zA-Z0-9]', '__', self.declaration_code("", pyrex=1))
+        to_py_function = "__Pyx_CFunc_%s_to_py" % safe_typename
+
+        for arg in self.args:
+            if not arg.type.is_pyobject and not arg.type.create_from_py_utility_code(env):
+                return False
+        if not (self.return_type.is_pyobject or self.return_type.is_void or
+                self.return_type.create_to_py_utility_code(env)):
+            return False
+
+        def declared_type(ctype):
+            type_displayname = str(ctype.declaration_code("", for_display=True))
+            if ctype.is_pyobject:
+                arg_ctype = type_name = type_displayname
+                if ctype.is_builtin_type:
+                    arg_ctype = ctype.name
+                elif not ctype.is_extension_type:
+                    type_name = 'object'
+                    type_displayname = None
+                else:
+                    type_displayname = repr(type_displayname)
+            elif ctype is c_bint_type:
+                type_name = arg_ctype = 'bint'
+            else:
+                type_name = arg_ctype = type_displayname
+                if ctype is c_double_type:
+                    type_displayname = 'float'
+                else:
+                    type_displayname = repr(type_displayname)
+            return type_name, arg_ctype, type_displayname
+
+        class Arg(object):
+            def __init__(self, arg_name, arg_type):
+                self.name = arg_name
+                self.type = arg_type
+                self.type_cname, self.ctype, self.type_displayname = declared_type(arg_type)
+
+        if self.return_type.is_void:
+            except_clause = 'except *'
+        elif self.return_type.is_pyobject:
+            except_clause = ''
+        elif self.exception_value:
+            except_clause = ('except? %s' if self.exception_check else 'except %s') % self.exception_value
+        else:
+            except_clause = 'except *'
+
+        context = {
+            'cname': to_py_function,
+            'args': [Arg(arg.name or 'arg%s' % ix, arg.type) for ix, arg in enumerate(self.args)],
+            'return_type': Arg('return', self.return_type),
+            'except_clause': except_clause,
+        }
+        # FIXME: directives come from first defining environment and do not adapt for reuse
+        env.use_utility_code(CythonUtilityCode.load(
+            "cfunc.to_py", "CFuncConvert.pyx",
+            outer_module_scope=env.global_scope(),  # need access to types declared in module
+            context=context, compiler_directives=dict(env.directives)))
+        self.to_py_function = to_py_function
+        return True
+

 def specialize_entry(entry, cname):
    """
@@ -3161,7 +3237,7 @@ class CppClassType(CType):
        if self == actual:
            return {}
        # TODO(robertwb): Actual type equality.
-        elif self.empty_declaration_code() == actual.template_type.declaration_code(""):
+        elif self.empty_declaration_code() == actual.template_type.empty_declaration_code():
            return reduce(
                merge_template_deductions,
                [formal_param.deduce_template_params(actual_param) for (formal_param, actual_param) in zip(self.templates, actual.templates)],

--- a/Cython/Compiler/Scanning.pxd
+++ b/Cython/Compiler/Scanning.pxd
@@ -4,10 +4,14 @@ import cython

 from ..Plex.Scanners cimport Scanner

+cdef get_lexicon()
+cdef initial_compile_time_env()
+
 cdef class Method:
    cdef object name
    cdef object __name__

+@cython.final
 cdef class CompileTimeScope:
    cdef public dict entries
    cdef public CompileTimeScope outer
@@ -15,6 +19,7 @@ cdef class CompileTimeScope:
    cdef lookup_here(self, name)
    cpdef lookup(self, name)

+@cython.final
 cdef class PyrexScanner(Scanner):
    cdef public context
    cdef public list included_files

--- a/Cython/Compiler/Scanning.py
+++ b/Cython/Compiler/Scanning.py
@@ -5,13 +5,15 @@

 from __future__ import absolute_import

+import cython
+cython.declare(EncodedString=object, make_lexicon=object, lexicon=object,
+               any_string_prefix=unicode, IDENT=unicode,
+               print_function=object, error=object, warning=object,
+               os=object, platform=object)
+
 import os
 import platform

-import cython
-cython.declare(EncodedString=object, any_string_prefix=unicode, IDENT=unicode,
-               print_function=object, error=object, warning=object)
-
 from .. import Utils
 from ..Plex.Scanners import Scanner
 from ..Plex.Errors import UnrecognizedInput
@@ -28,12 +30,14 @@ scanner_dump_file = None

 lexicon = None

+
 def get_lexicon():
    global lexicon
    if not lexicon:
        lexicon = make_lexicon()
    return lexicon

+
 #------------------------------------------------------------------

 py_reserved_words = [
@@ -49,15 +53,17 @@ pyx_reserved_words = py_reserved_words + [
    "cimport", "DEF", "IF", "ELIF", "ELSE"
 ]

+
 class Method(object):

    def __init__(self, name):
        self.name = name
-        self.__name__ = name # for Plex tracing
+        self.__name__ = name  # for Plex tracing

    def __call__(self, stream, text):
        return getattr(stream, self.name)(text)

+
 #------------------------------------------------------------------

 class CompileTimeScope(object):
@@ -88,6 +94,7 @@ class CompileTimeScope(object):
            else:
                raise

+
 def initial_compile_time_env():
    benv = CompileTimeScope()
    names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE',
@@ -116,6 +123,7 @@ def initial_compile_time_env():
    denv = CompileTimeScope(benv)
    return denv

+
 #------------------------------------------------------------------

 class SourceDescriptor(object):
@@ -166,6 +174,7 @@ class SourceDescriptor(object):
        except AttributeError:
            return False

+
 class FileSourceDescriptor(SourceDescriptor):
    """
    Represents a code source. A code source is a more generic abstraction
@@ -235,6 +244,7 @@ class FileSourceDescriptor(SourceDescriptor):
    def __repr__(self):
        return "<FileSourceDescriptor:%s>" % self.filename

+
 class StringSourceDescriptor(SourceDescriptor):
    """
    Instances of this class can be used instead of a filenames if the
@@ -275,6 +285,7 @@ class StringSourceDescriptor(SourceDescriptor):
    def __repr__(self):
        return "<StringSourceDescriptor:%s>" % self.name

+
 #------------------------------------------------------------------

 class PyrexScanner(Scanner):
@@ -284,8 +295,8 @@ class PyrexScanner(Scanner):
    #  compile_time_eval  boolean  In a true conditional compilation context
    #  compile_time_expr  boolean  In a compile-time expression context

-    def __init__(self, file, filename, parent_scanner = None,
-                 scope = None, context = None, source_encoding=None, parse_comments=True, initial_pos=None):
+    def __init__(self, file, filename, parent_scanner=None,
+                 scope=None, context=None, source_encoding=None, parse_comments=True, initial_pos=None):
        Scanner.__init__(self, get_lexicon(), file, filename, initial_pos)
        if parent_scanner:
            self.context = parent_scanner.context
@@ -299,8 +310,7 @@ class PyrexScanner(Scanner):
            self.compile_time_env = initial_compile_time_env()
            self.compile_time_eval = 1
            self.compile_time_expr = 0
-            if hasattr(context.options, 'compile_time_env') and \
-               context.options.compile_time_env is not None:
+            if getattr(context.options, 'compile_time_env', None):
                self.compile_time_env.update(context.options.compile_time_env)
        self.parse_comments = parse_comments
        self.source_encoding = source_encoding
@@ -326,11 +336,11 @@ class PyrexScanner(Scanner):
        return self.indentation_stack[-1]

    def open_bracket_action(self, text):
-        self.bracket_nesting_level = self.bracket_nesting_level + 1
+        self.bracket_nesting_level += 1
        return text

    def close_bracket_action(self, text):
-        self.bracket_nesting_level = self.bracket_nesting_level - 1
+        self.bracket_nesting_level -= 1
        return text

    def newline_action(self, text):
@@ -406,6 +416,7 @@ class PyrexScanner(Scanner):
            sy, systring = self.read()
        except UnrecognizedInput:
            self.error("Unrecognized character")
+            return  # just a marker, error() always raises
        if sy == IDENT:
            if systring in self.keywords:
                if systring == u'print' and print_function in self.context.future_directives:
@@ -445,21 +456,21 @@ class PyrexScanner(Scanner):
        # This method should be added to Plex
        self.queue.insert(0, (token, value))

-    def error(self, message, pos = None, fatal = True):
+    def error(self, message, pos=None, fatal=True):
        if pos is None:
            pos = self.position()
        if self.sy == 'INDENT':
-            err = error(pos, "Possible inconsistent indentation")
+            error(pos, "Possible inconsistent indentation")
        err = error(pos, message)
        if fatal: raise err

-    def expect(self, what, message = None):
+    def expect(self, what, message=None):
        if self.sy == what:
            self.next()
        else:
            self.expected(what, message)

-    def expect_keyword(self, what, message = None):
+    def expect_keyword(self, what, message=None):
        if self.sy == IDENT and self.systring == what:
            self.next()
        else:
@@ -476,12 +487,10 @@ class PyrexScanner(Scanner):
            self.error("Expected '%s', found '%s'" % (what, found))

    def expect_indent(self):
-        self.expect('INDENT',
-            "Expected an increase in indentation level")
+        self.expect('INDENT', "Expected an increase in indentation level")

    def expect_dedent(self):
-        self.expect('DEDENT',
-            "Expected a decrease in indentation level")
+        self.expect('DEDENT', "Expected a decrease in indentation level")

    def expect_newline(self, message="Expected a newline", ignore_semicolon=False):
        # Expect either a newline or end of file

--- a/Cython/Compiler/Symtab.py
+++ b/Cython/Compiler/Symtab.py
@@ -303,7 +303,7 @@ class Scope(object):
        self.name = name
        self.outer_scope = outer_scope
        self.parent_scope = parent_scope
-        mangled_name = "%d%s_" % (len(name), name)
+        mangled_name = "%d%s_" % (len(name), name.replace('.', '_dot_'))
        qual_scope = self.qualifying_scope()
        if qual_scope:
            self.qualified_name = qual_scope.qualify_name(name)
@@ -1044,15 +1044,13 @@ class ModuleScope(Scope):
    def global_scope(self):
        return self

-    def lookup(self, name):
+    def lookup(self, name, language_level=None):
        entry = self.lookup_here(name)
        if entry is not None:
            return entry

-        if self.context is not None:
-            language_level = self.context.language_level
-        else:
-            language_level = 3
+        if language_level is None:
+            language_level = self.context.language_level if self.context is not None else 3

        return self.outer_scope.lookup(name, language_level=language_level)


--- a/Cython/Compiler/TreeFragment.py
+++ b/Cython/Compiler/TreeFragment.py
@@ -23,16 +23,19 @@ from . import UtilNodes


 class StringParseContext(Main.Context):
-    def __init__(self, name, include_directories=None):
-        if include_directories is None: include_directories = []
-        Main.Context.__init__(self, include_directories, {},
+    def __init__(self, name, include_directories=None, compiler_directives=None):
+        if include_directories is None:
+            include_directories = []
+        if compiler_directives is None:
+            compiler_directives = {}
+        Main.Context.__init__(self, include_directories, compiler_directives,
                              create_testscope=False)
        self.module_name = name

-    def find_module(self, module_name, relative_to = None, pos = None, need_pxd = 1):
+    def find_module(self, module_name, relative_to=None, pos=None, need_pxd=1):
        if module_name not in (self.module_name, 'cython'):
            raise AssertionError("Not yet supporting any cimports/includes from string code snippets")
-        return ModuleScope(module_name, parent_module = None, context = self)
+        return ModuleScope(module_name, parent_module=None, context=self)


 def parse_from_strings(name, code, pxds={}, level=None, initial_pos=None,
@@ -64,7 +67,7 @@ def parse_from_strings(name, code, pxds={}, level=None, initial_pos=None,
        initial_pos = (name, 1, 0)
    code_source = StringSourceDescriptor(name, code)

-    scope = context.find_module(module_name, pos = initial_pos, need_pxd = 0)
+    scope = context.find_module(module_name, pos=initial_pos, need_pxd=False)

    buf = StringIO(code)

@@ -190,20 +193,27 @@ class TemplateTransform(VisitorTransform):
        else:
            return self.visit_Node(node)

+
 def copy_code_tree(node):
    return TreeCopier()(node)

-INDENT_RE = re.compile(ur"^ *")
+
+_match_indent = re.compile(ur"^ *").match
+
+
 def strip_common_indent(lines):
-    "Strips empty lines and common indentation from the list of strings given in lines"
+    """Strips empty lines and common indentation from the list of strings given in lines"""
    # TODO: Facilitate textwrap.indent instead
    lines = [x for x in lines if x.strip() != u""]
-    minindent = min([len(INDENT_RE.match(x).group(0)) for x in lines])
+    minindent = min([len(_match_indent(x).group(0)) for x in lines])
    lines = [x[minindent:] for x in lines]
    return lines

+
 class TreeFragment(object):
-    def __init__(self, code, name="(tree fragment)", pxds={}, temps=[], pipeline=[], level=None, initial_pos=None):
+    def __init__(self, code, name=None, pxds={}, temps=[], pipeline=[], level=None, initial_pos=None):
+        if not name:
+            name = "(tree fragment)"
        if isinstance(code, unicode):
            def fmt(x): return u"\n".join(strip_common_indent(x.split(u"\n")))


--- a/Cython/Compiler/TypeInference.py
+++ b/Cython/Compiler/TypeInference.py
@@ -494,24 +494,22 @@ def find_spanning_type(type1, type2):
        return PyrexTypes.c_double_type
    return result_type

-def aggressive_spanning_type(types, might_overflow, pos):
-    result_type = reduce(find_spanning_type, types)
+def simply_type(result_type, pos):
    if result_type.is_reference:
        result_type = result_type.ref_base_type
    if result_type.is_const:
        result_type = result_type.const_base_type
    if result_type.is_cpp_class:
        result_type.check_nullary_constructor(pos)
+    if result_type.is_array:
+        result_type = PyrexTypes.c_ptr_type(result_type.base_type)
    return result_type

+def aggressive_spanning_type(types, might_overflow, pos):
+    return simply_type(reduce(find_spanning_type, types), pos)
+
 def safe_spanning_type(types, might_overflow, pos):
-    result_type = reduce(find_spanning_type, types)
-    if result_type.is_const:
-        result_type = result_type.const_base_type
-    if result_type.is_reference:
-        result_type = result_type.ref_base_type
-    if result_type.is_cpp_class:
-        result_type.check_nullary_constructor(pos)
+    result_type = simply_type(reduce(find_spanning_type, types), pos)
    if result_type.is_pyobject:
        # In theory, any specific Python type is always safe to
        # infer. However, inferring str can cause some existing code

--- a/Cython/Compiler/UtilityCode.py
+++ b/Cython/Compiler/UtilityCode.py
@@ -8,6 +8,8 @@ from . import Code

 class NonManglingModuleScope(Symtab.ModuleScope):

+    cpp = False
+
    def __init__(self, prefix, *args, **kw):
        self.prefix = prefix
        self.cython_scope = None
@@ -28,12 +30,11 @@ class NonManglingModuleScope(Symtab.ModuleScope):
        else:
            return Symtab.ModuleScope.mangle(self, prefix)

+
 class CythonUtilityCodeContext(StringParseContext):
    scope = None

-    def find_module(self, module_name, relative_to = None, pos = None,
-                    need_pxd = 1):
-
+    def find_module(self, module_name, relative_to=None, pos=None, need_pxd=True):
        if module_name != self.module_name:
            if module_name not in self.modules:
                raise AssertionError("Only the cython cimport is supported.")
@@ -41,10 +42,8 @@ class CythonUtilityCodeContext(StringParseContext):
                return self.modules[module_name]

        if self.scope is None:
-            self.scope = NonManglingModuleScope(self.prefix,
-                                                module_name,
-                                                parent_module=None,
-                                                context=self)
+            self.scope = NonManglingModuleScope(
+                self.prefix, module_name, parent_module=None, context=self)

        return self.scope

@@ -69,7 +68,8 @@ class CythonUtilityCode(Code.UtilityCodeBase):
    is_cython_utility = True

    def __init__(self, impl, name="__pyxutil", prefix="", requires=None,
-                 file=None, from_scope=None, context=None):
+                 file=None, from_scope=None, context=None, compiler_directives=None,
+                 outer_module_scope=None):
        # 1) We need to delay the parsing/processing, so that all modules can be
        #    imported without import loops
        # 2) The same utility code object can be used for multiple source files;
@@ -84,6 +84,20 @@ class CythonUtilityCode(Code.UtilityCodeBase):
        self.prefix = prefix
        self.requires = requires or []
        self.from_scope = from_scope
+        self.outer_module_scope = outer_module_scope
+        self.compiler_directives = compiler_directives
+
+    def __eq__(self, other):
+        if isinstance(other, CythonUtilityCode):
+            return self._equality_params() == other._equality_params()
+        else:
+           return False
+
+    def _equality_params(self):
+        return self.impl, self.outer_module_scope, self.compiler_directives
+
+    def __hash__(self):
+        return hash(self.impl)

    def get_tree(self, entries_only=False, cython_scope=None):
        from .AnalysedTreeTransforms import AutoTestDictTransform
@@ -93,12 +107,13 @@ class CythonUtilityCode(Code.UtilityCodeBase):
        excludes = [AutoTestDictTransform]

        from . import Pipeline, ParseTreeTransforms
-        context = CythonUtilityCodeContext(self.name)
+        context = CythonUtilityCodeContext(
+            self.name, compiler_directives=self.compiler_directives)
        context.prefix = self.prefix
        context.cython_scope = cython_scope
        #context = StringParseContext(self.name)
-        tree = parse_from_strings(self.name, self.impl, context=context,
-                                  allow_struct_enum_decorator=True)
+        tree = parse_from_strings(
+            self.name, self.impl, context=context, allow_struct_enum_decorator=True)
        pipeline = Pipeline.create_pipeline(context, 'pyx', exclude_classes=excludes)

        if entries_only:
@@ -126,6 +141,16 @@ class CythonUtilityCode(Code.UtilityCodeBase):
            pipeline = Pipeline.insert_into_pipeline(pipeline, scope_transform,
                                                     before=transform)

+        if self.outer_module_scope:
+            # inject outer module between utility code module and builtin module
+            def scope_transform(module_node):
+                module_node.scope.outer_scope = self.outer_module_scope
+                return module_node
+
+            transform = ParseTreeTransforms.AnalyseDeclarationsTransform
+            pipeline = Pipeline.insert_into_pipeline(pipeline, scope_transform,
+                                                     before=transform)
+
        (err, tree) = Pipeline.run_pipeline(pipeline, tree, printtree=False)
        assert not err, err
        return tree

--- a/Cython/Includes/cpython/unicode.pxd
+++ b/Cython/Includes/cpython/unicode.pxd
@@ -131,6 +131,131 @@ cdef extern from *:

    #Py_ssize_t PyUnicode_AsWideChar(object o, wchar_t *w, Py_ssize_t size)

+
+# Unicode Methods
+
+    # Concat two strings giving a new Unicode string.
+    # Return value: New reference.
+    unicode PyUnicode_Concat(object left, object right)
+
+    # Split a string giving a list of Unicode strings. If sep is NULL,
+    # splitting will be done at all whitespace substrings. Otherwise,
+    # splits occur at the given separator. At most maxsplit splits will
+    # be done. If negative, no limit is set. Separators are not included
+    # in the resulting list.
+    # Return value: New reference.
+    list PyUnicode_Split(object s, object sep, Py_ssize_t maxsplit)
+
+    # Split a Unicode string at line breaks, returning a list of Unicode
+    # strings. CRLF is considered to be one line break. If keepend is 0,
+    # the Line break characters are not included in the resulting strings.
+    # Return value: New reference.
+    list PyUnicode_Splitlines(object s, bint keepend)
+
+    # Translate a string by applying a character mapping table to it and
+    # return the resulting Unicode object.
+    #
+    # The mapping table must map Unicode ordinal integers to Unicode ordinal
+    # integers or None (causing deletion of the character).
+    #
+    # Mapping tables need only provide the __getitem__() interface;
+    # dictionaries and sequences work well. Unmapped character ordinals (ones
+    # which cause a LookupError) are left untouched and are copied as-is.
+    #
+    # errors has the usual meaning for codecs. It may be NULL which indicates
+    # to use the default error handling.
+    # Return value: New reference.
+    unicode PyUnicode_Translate(object str, object table, const char *errors)
+
+    # Join a sequence of strings using the given separator and return the
+    # resulting Unicode string.
+    # Return value: New reference.
+    unicode PyUnicode_Join(object separator, object seq)
+
+    # Return 1 if substr matches str[start:end] at the given tail end
+    # (direction == -1 means to do a prefix match, direction == 1 a
+    # suffix match), 0 otherwise.
+    # Return -1 if an error occurred.
+    Py_ssize_t PyUnicode_Tailmatch(object str, object substr,
+                                   Py_ssize_t start, Py_ssize_t end, int direction) except -1
+
+    # Return the first position of substr in str[start:end] using the given
+    # direction (direction == 1 means to do a forward search, direction == -1
+    # a backward search). The return value is the index of the first match;
+    # a value of -1 indicates that no match was found, and -2 indicates that an
+    # error occurred and an exception has been set.
+    Py_ssize_t PyUnicode_Find(object str, object substr, Py_ssize_t start, Py_ssize_t end, int direction) except -2
+
+    # Return the first position of the character ch in str[start:end] using
+    # the given direction (direction == 1 means to do a forward search,
+    # direction == -1 a backward search). The return value is the index of
+    # the first match; a value of -1 indicates that no match was found, and
+    # -2 indicates that an error occurred and an exception has been set.
+    # New in version 3.3.
+    Py_ssize_t PyUnicode_FindChar(object str, Py_UCS4 ch, Py_ssize_t start, Py_ssize_t end, int direction) except -2
+
+    # Return the number of non-overlapping occurrences of substr in
+    # str[start:end]. Return -1 if an error occurred.
+    Py_ssize_t PyUnicode_Count(object str, object substr, Py_ssize_t start, Py_ssize_t end) except -1
+
+    # Replace at most maxcount occurrences of substr in str with replstr and
+    # return the resulting Unicode object. maxcount == -1 means replace all
+    # occurrences.
+    # Return value: New reference.
+    unicode PyUnicode_Replace(object str, object substr, object replstr, Py_ssize_t maxcount)
+
+    # Compare two strings and return -1, 0, 1 for less than,
+    # equal, and greater than, respectively.
+    int PyUnicode_Compare(object left, object right) except? -1
+
+    # Compare a unicode object, uni, with string and return -1, 0, 1 for less than,
+    # equal, and greater than, respectively. It is best to pass only ASCII-encoded
+    # strings, but the function interprets the input string as ISO-8859-1 if it
+    # contains non-ASCII characters.
+    int PyUnicode_CompareWithASCIIString(object uni, char *string) except? -1
+
+    # Rich compare two unicode strings and return one of the following:
+    #
+    #    NULL in case an exception was raised
+    #    Py_True or Py_False for successful comparisons
+    #    Py_NotImplemented in case the type combination is unknown
+    #
+    # Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in case
+    # the conversion of the arguments to Unicode fails with a UnicodeDecodeError.
+    #
+    # Possible values for op are Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, and Py_LE.
+    object PyUnicode_RichCompare(object left, object right, int op)
+
+    # Return a new string object from format and args; this is analogous to
+    # format % args.
+    # Return value: New reference.
+    unicode PyUnicode_Format(object format, object args)
+
+    # Check whether element is contained in container and return true or false
+    # accordingly.
+    #
+    # element has to coerce to a one element Unicode string. -1 is returned
+    # if there was an error.
+    int PyUnicode_Contains(object container, object element) except -1
+
+    # Intern the argument *string in place. The argument must be the address
+    # of a pointer variable pointing to a Python unicode string object. If
+    # there is an existing interned string that is the same as *string, it sets
+    # *string to it (decrementing the reference count of the old string object
+    # and incrementing the reference count of the interned string object),
+    # otherwise it leaves *string alone and interns it (incrementing its reference
+    # count). (Clarification: even though there is a lot of talk about reference
+    # counts, think of this function as reference-count-neutral; you own the object
+    # after the call if and only if you owned it before the call.)
+    #void PyUnicode_InternInPlace(PyObject **string)
+
+    # A combination of PyUnicode_FromString() and PyUnicode_InternInPlace(),
+    # returning either a new unicode string object that has been interned, or
+    # a new ("owned") reference to an earlier interned string object with the
+    # same value.
+    unicode PyUnicode_InternFromString(const char *v)
+
+
 # Codecs

    # Create a Unicode object by decoding size bytes of the encoded
@@ -161,22 +286,22 @@ cdef extern from *:
    # Create a Unicode object by decoding size bytes of the UTF-8
    # encoded string s. Return NULL if an exception was raised by the
    # codec.
-    object PyUnicode_DecodeUTF8(char *s, Py_ssize_t size, char *errors)
+    unicode PyUnicode_DecodeUTF8(char *s, Py_ssize_t size, char *errors)

    # If consumed is NULL, behave like PyUnicode_DecodeUTF8(). If
    # consumed is not NULL, trailing incomplete UTF-8 byte sequences
    # will not be treated as an error. Those bytes will not be decoded
    # and the number of bytes that have been decoded will be stored in
    # consumed. New in version 2.4.
-    object PyUnicode_DecodeUTF8Stateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed)
+    unicode PyUnicode_DecodeUTF8Stateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed)

    # Encode the Py_UNICODE buffer of the given size using UTF-8 and
    # return a Python string object. Return NULL if an exception was
    # raised by the codec.
-    object PyUnicode_EncodeUTF8(Py_UNICODE *s, Py_ssize_t size, char *errors)
+    bytes PyUnicode_EncodeUTF8(Py_UNICODE *s, Py_ssize_t size, char *errors)

    # Encode a Unicode objects using UTF-8 and return the result as Python string object. Error handling is ``strict''. Return NULL if an exception was raised by the codec.
-    object PyUnicode_AsUTF8String(object unicode)
+    bytes PyUnicode_AsUTF8String(object unicode)

 # These are the UTF-16 codec APIs:

@@ -198,7 +323,7 @@ cdef extern from *:
    # order at the.
    #
    # If byteorder is NULL, the codec starts in native order mode.
-    object PyUnicode_DecodeUTF16(char *s, Py_ssize_t size, char *errors, int *byteorder)
+    unicode PyUnicode_DecodeUTF16(char *s, Py_ssize_t size, char *errors, int *byteorder)

    # If consumed is NULL, behave like PyUnicode_DecodeUTF16(). If
    # consumed is not NULL, PyUnicode_DecodeUTF16Stateful() will not
@@ -206,7 +331,7 @@ cdef extern from *:
    # number of bytes or a split surrogate pair) as an error. Those
    # bytes will not be decoded and the number of bytes that have been
    # decoded will be stored in consumed. New in version 2.4.
-    object PyUnicode_DecodeUTF16Stateful(char *s, Py_ssize_t size, char *errors, int *byteorder, Py_ssize_t *consumed)
+    unicode PyUnicode_DecodeUTF16Stateful(char *s, Py_ssize_t size, char *errors, int *byteorder, Py_ssize_t *consumed)

    # Return a Python string object holding the UTF-16 encoded value
    # of the Unicode data in s. If byteorder is not 0, output is
@@ -223,13 +348,13 @@ cdef extern from *:
    # If Py_UNICODE_WIDE is defined, a single Py_UNICODE value may get
    # represented as a surrogate pair. If it is not defined, each
    # Py_UNICODE values is interpreted as an UCS-2 character.
-    object PyUnicode_EncodeUTF16(Py_UNICODE *s, Py_ssize_t size, char *errors, int byteorder)
+    bytes PyUnicode_EncodeUTF16(Py_UNICODE *s, Py_ssize_t size, char *errors, int byteorder)

    # Return a Python string using the UTF-16 encoding in native byte
    # order. The string always starts with a BOM mark. Error handling
    # is ``strict''. Return NULL if an exception was raised by the
    # codec.
-    object PyUnicode_AsUTF16String(object unicode)
+    bytes PyUnicode_AsUTF16String(object unicode)

 # These are the ``Unicode Escape'' codec APIs:

@@ -270,17 +395,17 @@ cdef extern from *:
    # Create a Unicode object by decoding size bytes of the Latin-1
    # encoded string s. Return NULL if an exception was raised by the
    # codec.
-    object PyUnicode_DecodeLatin1(char *s, Py_ssize_t size, char *errors)
+    unicode PyUnicode_DecodeLatin1(char *s, Py_ssize_t size, char *errors)

    # Encode the Py_UNICODE buffer of the given size using Latin-1 and
-    # return a Python string object. Return NULL if an exception was
+    # return a Python bytes object. Return NULL if an exception was
    # raised by the codec.
-    object PyUnicode_EncodeLatin1(Py_UNICODE *s, Py_ssize_t size, char *errors)
+    bytes PyUnicode_EncodeLatin1(Py_UNICODE *s, Py_ssize_t size, char *errors)

    # Encode a Unicode objects using Latin-1 and return the result as
-    # Python string object. Error handling is ``strict''. Return NULL
+    # Python bytes object. Error handling is ``strict''. Return NULL
    # if an exception was raised by the codec.
-    object PyUnicode_AsLatin1String(object unicode)
+    bytes PyUnicode_AsLatin1String(object unicode)

 # These are the ASCII codec APIs. Only 7-bit ASCII data is
 # accepted. All other codes generate errors.
@@ -288,17 +413,17 @@ cdef extern from *:
    # Create a Unicode object by decoding size bytes of the ASCII
    # encoded string s. Return NULL if an exception was raised by the
    # codec.
-    object PyUnicode_DecodeASCII(char *s, Py_ssize_t size, char *errors)
+    unicode PyUnicode_DecodeASCII(char *s, Py_ssize_t size, char *errors)

    # Encode the Py_UNICODE buffer of the given size using ASCII and
-    # return a Python string object. Return NULL if an exception was
+    # return a Python bytes object. Return NULL if an exception was
    # raised by the codec.
-    object PyUnicode_EncodeASCII(Py_UNICODE *s, Py_ssize_t size, char *errors)
+    bytes PyUnicode_EncodeASCII(Py_UNICODE *s, Py_ssize_t size, char *errors)

    # Encode a Unicode objects using ASCII and return the result as
-    # Python string object. Error handling is ``strict''. Return NULL
+    # Python bytes object. Error handling is ``strict''. Return NULL
    # if an exception was raised by the codec.
-    object PyUnicode_AsASCIIString(object o)
+    bytes PyUnicode_AsASCIIString(object o)

 # These are the mapping codec APIs:
 #
@@ -339,6 +464,8 @@ cdef extern from *:
    # Encode the Py_UNICODE buffer of the given size using the given
    # mapping object and return a Python string object. Return NULL if
    # an exception was raised by the codec.
+    #
+    # Deprecated since version 3.3, will be removed in version 4.0.
    object PyUnicode_EncodeCharmap(Py_UNICODE *s, Py_ssize_t size, object mapping, char *errors)

    # Encode a Unicode objects using the given mapping object and
@@ -359,6 +486,8 @@ cdef extern from *:
    # dictionaries and sequences work well. Unmapped character
    # ordinals (ones which cause a LookupError) are left untouched and
    # are copied as-is.
+    #
+    # Deprecated since version 3.3, will be removed in version 4.0.
    object PyUnicode_TranslateCharmap(Py_UNICODE *s, Py_ssize_t size,
                                      object table, char *errors)

@@ -371,21 +500,43 @@ cdef extern from *:
    # Create a Unicode object by decoding size bytes of the MBCS
    # encoded string s. Return NULL if an exception was raised by the
    # codec.
-    object PyUnicode_DecodeMBCS(char *s, Py_ssize_t size, char *errors)
+    unicode PyUnicode_DecodeMBCS(char *s, Py_ssize_t size, char *errors)

    # If consumed is NULL, behave like PyUnicode_DecodeMBCS(). If
    # consumed is not NULL, PyUnicode_DecodeMBCSStateful() will not
    # decode trailing lead byte and the number of bytes that have been
    # decoded will be stored in consumed. New in version 2.5.
    # NOTE: Python 2.x uses 'int' values for 'size' and 'consumed' (changed in 3.0)
-    object PyUnicode_DecodeMBCSStateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed)
+    unicode PyUnicode_DecodeMBCSStateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed)

    # Encode the Py_UNICODE buffer of the given size using MBCS and
    # return a Python string object. Return NULL if an exception was
    # raised by the codec.
-    object PyUnicode_EncodeMBCS(Py_UNICODE *s, Py_ssize_t size, char *errors)
+    bytes PyUnicode_EncodeMBCS(Py_UNICODE *s, Py_ssize_t size, char *errors)

    # Encode a Unicode objects using MBCS and return the result as
    # Python string object. Error handling is ``strict''. Return NULL
    # if an exception was raised by the codec.
-    object PyUnicode_AsMBCSString(object o)
+    bytes PyUnicode_AsMBCSString(object o)
+
+    # Encode the Unicode object using the specified code page and return
+    # a Python bytes object. Return NULL if an exception was raised by the
+    # codec. Use CP_ACP code page to get the MBCS encoder.
+    #
+    # New in version 3.3.
+    bytes PyUnicode_EncodeCodePage(int code_page, object unicode, const char *errors)
+
+
+# Py_UCS4 helpers (new in CPython 3.3)
+
+    # These utility functions work on strings of Py_UCS4 characters and
+    # otherwise behave like the C standard library functions with the same name.
+
+    size_t Py_UCS4_strlen(const Py_UCS4 *u)
+    Py_UCS4* Py_UCS4_strcpy(Py_UCS4 *s1, const Py_UCS4 *s2)
+    Py_UCS4* Py_UCS4_strncpy(Py_UCS4 *s1, const Py_UCS4 *s2, size_t n)
+    Py_UCS4* Py_UCS4_strcat(Py_UCS4 *s1, const Py_UCS4 *s2)
+    int Py_UCS4_strcmp(const Py_UCS4 *s1, const Py_UCS4 *s2)
+    int Py_UCS4_strncmp(const Py_UCS4 *s1, const Py_UCS4 *s2, size_t n)
+    Py_UCS4* Py_UCS4_strchr(const Py_UCS4 *s, Py_UCS4 c)
+    Py_UCS4* Py_UCS4_strrchr(const Py_UCS4 *s, Py_UCS4 c)
--- a/Cython/Includes/libc/time.pxd
+++ b/Cython/Includes/libc/time.pxd
+# http://en.wikipedia.org/wiki/C_date_and_time_functions
+
+from libc.stddef cimport wchar_t
+
+cdef extern from "time.h" nogil:
+    ctypedef long clock_t
+    ctypedef long time_t
+
+    enum: CLOCKS_PER_SEC
+    clock_t clock()             # CPU time
+    time_t  time(time_t *)      # wall clock time since Unix epoch
+
+    cdef struct tm:
+        int  tm_sec
+        int  tm_min
+        int  tm_hour
+        int  tm_mday
+        int  tm_mon
+        int  tm_year
+        int  tm_wday
+        int  tm_yday
+        int  tm_isdst
+        char *tm_zone
+        long tm_gmtoff
+
+    int     daylight            # global state
+    long    timezone
+    char    *tzname[2]
+    void    tzset()
+
+    char    *asctime(const tm *)
+    char    *asctime_r(const tm *, char *)
+    char    *ctime(const time_t *)
+    char    *ctime_r(const time_t *, char *)
+    double  difftime(time_t, time_t)
+    tm      *getdate(const char *)
+    tm      *gmtime(const time_t *)
+    tm      *gmtime_r(const time_t *, tm *)
+    tm      *localtime(const time_t *)
+    tm      *localtime_r(const time_t *, tm *)
+    time_t  mktime(tm *)
+    size_t  strftime(char *, size_t, const char *, const tm *)
+    size_t  wcsftime(wchar_t *str, size_t cnt, const wchar_t *fmt, tm *time)
+
+    # POSIX not stdC
+    char    *strptime(const char *, const char *, tm *)
--- a/Cython/Includes/posix/resource.pxd
+++ b/Cython/Includes/posix/resource.pxd
 # http://pubs.opengroup.org/onlinepubs/009695399/basedefs/sys/resource.h.html

-from posix.sys_time cimport timeval
+from posix.time  cimport timeval
 from posix.types cimport id_t

 cdef extern from "sys/resource.h" nogil:

--- a/Cython/Includes/posix/sys_time.pxd
+++ b/Cython/Includes/posix/sys_time.pxd
-# http://pubs.opengroup.org/onlinepubs/009695399/basedefs/sys/time.h.html
-
-from posix.types cimport suseconds_t, time_t
-
-cdef extern from "sys/time.h" nogil:
-
-    enum: ITIMER_REAL
-    enum: ITIMER_VIRTUAL
-    enum: ITIMER_PROF
-
-    cdef struct timezone:
-        int tz_minuteswest
-        int dsttime
-
-    cdef struct timeval:
-        time_t      tv_sec
-        suseconds_t tv_usec
-
-    cdef struct itimerval:
-        timeval it_interval
-        timeval it_value
-
-    int     getitimer(int, itimerval *)
-    int     gettimeofday(timeval *tp, timezone *tzp)
-    int     setitimer(int, const itimerval *, itimerval *)
--- a/Cython/Includes/posix/time.pxd
+++ b/Cython/Includes/posix/time.pxd
 # http://pubs.opengroup.org/onlinepubs/009695399/basedefs/sys/time.h.html

+from posix.types cimport suseconds_t, time_t, clockid_t, timer_t
 from posix.signal cimport sigevent
-from posix.types cimport clock_t, clockid_t, suseconds_t, time_t, timer_t

-cdef extern from "time.h" nogil:
-
-    enum: CLOCKS_PER_SEC
+cdef extern from "sys/time.h" nogil:
    enum: CLOCK_PROCESS_CPUTIME_ID
    enum: CLOCK_THREAD_CPUTIME_ID

@@ -33,55 +31,44 @@ cdef extern from "time.h" nogil:
    enum: CLOCK_REALTIME_ALARM
    enum: CLOCK_BOOTTIME_ALARM

+    enum: ITIMER_REAL
+    enum: ITIMER_VIRTUAL
+    enum: ITIMER_PROF
+
+    cdef struct timezone:
+        int tz_minuteswest
+        int dsttime
+
+    cdef struct timeval:
+        time_t      tv_sec
+        suseconds_t tv_usec
+
    cdef struct timespec:
        time_t tv_sec
        long   tv_nsec

+    cdef struct itimerval:
+        timeval it_interval
+        timeval it_value
+
    cdef struct itimerspec:
        timespec it_interval
        timespec it_value

-    cdef struct tm:
-        int  tm_sec
-        int  tm_min
-        int  tm_hour
-        int  tm_mday
-        int  tm_mon
-        int  tm_year
-        int  tm_wday
-        int  tm_yday
-        int  tm_isdst
-        char *tm_zone
-        long tm_gmtoff
+    int nanosleep(const timespec *, timespec *)
+
+    int getitimer(int, itimerval *)
+    int gettimeofday(timeval *tp, timezone *tzp)
+    int setitimer(int, const itimerval *, itimerval *)

-    char    *asctime(const tm *)
-    char    *asctime_r(const tm *, char *)
-    clock_t clock()
-    int     clock_getcpuclockid(pid_t, clockid_t *)
-    int     clock_getres(clockid_t, timespec *)
-    int     clock_gettime(clockid_t, timespec *)
-    int     clock_nanosleep(clockid_t, int, const timespec *, timespec *)
-    int     clock_settime(clockid_t, const timespec *)
-    char    *ctime(const time_t *)
-    char    *ctime_r(const time_t *, char *)
-    double  difftime(time_t, time_t)
-    tm      *getdate(const char *)
-    tm      *gmtime(const time_t *)
-    tm      *gmtime_r(const time_t *, tm *)
-    tm      *localtime(const time_t *)
-    tm      *localtime_r(const time_t *, tm *)
-    time_t  mktime(tm *)
-    int     nanosleep(const timespec *, timespec *)
-    size_t  strftime(char *, size_t, const char *, const tm *)
-    char    *strptime(const char *, const char *, tm *)
-    time_t  time(time_t *)
-    int     timer_create(clockid_t, sigevent *, timer_t *)
-    int     timer_delete(timer_t)
-    int     timer_gettime(timer_t, itimerspec *)
-    int     timer_getoverrun(timer_t)
-    int     timer_settime(timer_t, int, const itimerspec *, itimerspec *)
-    void    tzset()
+    int clock_getcpuclockid(pid_t, clockid_t *)
+    int clock_getres(clockid_t, timespec *)
+    int clock_gettime(clockid_t, timespec *)
+    int clock_nanosleep(clockid_t, int, const timespec *, timespec *)
+    int clock_settime(clockid_t, const timespec *)

-    int daylight
-    long timezone
-    char *tzname[2]
+    int timer_create(clockid_t, sigevent *, timer_t *)
+    int timer_delete(timer_t)
+    int timer_gettime(timer_t, itimerspec *)
+    int timer_getoverrun(timer_t)
+    int timer_settime(timer_t, int, const itimerspec *, itimerspec *)
--- a/Cython/Includes/posix/types.pxd
+++ b/Cython/Includes/posix/types.pxd
 cdef extern from "sys/types.h":
    ctypedef long blkcnt_t
    ctypedef long blksize_t
-    ctypedef long clock_t
    ctypedef long clockid_t
    ctypedef long dev_t
    ctypedef long gid_t

--- a/Cython/Plex/Actions.py
+++ b/Cython/Plex/Actions.py
@@ -7,98 +7,101 @@
 #=======================================================================

 class Action(object):
+    def perform(self, token_stream, text):
+        pass  # abstract

-  def perform(self, token_stream, text):
-    pass # abstract
-
-  def same_as(self, other):
-    return self is other
+    def same_as(self, other):
+        return self is other


 class Return(Action):
-  """
-  Internal Plex action which causes |value| to
-  be returned as the value of the associated token
-  """
+    """
+    Internal Plex action which causes |value| to
+    be returned as the value of the associated token
+    """

-  def __init__(self, value):
-    self.value = value
+    def __init__(self, value):
+        self.value = value

-  def perform(self, token_stream, text):
-    return self.value
+    def perform(self, token_stream, text):
+        return self.value

-  def same_as(self, other):
-    return isinstance(other, Return) and self.value == other.value
+    def same_as(self, other):
+        return isinstance(other, Return) and self.value == other.value

-  def __repr__(self):
-    return "Return(%s)" % repr(self.value)
+    def __repr__(self):
+        return "Return(%s)" % repr(self.value)


 class Call(Action):
-  """
-  Internal Plex action which causes a function to be called.
-  """
+    """
+    Internal Plex action which causes a function to be called.
+    """

-  def __init__(self, function):
-    self.function = function
+    def __init__(self, function):
+        self.function = function

-  def perform(self, token_stream, text):
-    return self.function(token_stream, text)
+    def perform(self, token_stream, text):
+        return self.function(token_stream, text)

-  def __repr__(self):
-    return "Call(%s)" % self.function.__name__
+    def __repr__(self):
+        return "Call(%s)" % self.function.__name__

-  def same_as(self, other):
-    return isinstance(other, Call) and self.function is other.function
+    def same_as(self, other):
+        return isinstance(other, Call) and self.function is other.function


 class Begin(Action):
-  """
-  Begin(state_name) is a Plex action which causes the Scanner to
-  enter the state |state_name|. See the docstring of Plex.Lexicon
-  for more information.
-  """
+    """
+    Begin(state_name) is a Plex action which causes the Scanner to
+    enter the state |state_name|. See the docstring of Plex.Lexicon
+    for more information.
+    """

-  def __init__(self, state_name):
-    self.state_name = state_name
+    def __init__(self, state_name):
+        self.state_name = state_name

-  def perform(self, token_stream, text):
-    token_stream.begin(self.state_name)
+    def perform(self, token_stream, text):
+        token_stream.begin(self.state_name)

-  def __repr__(self):
-    return "Begin(%s)" % self.state_name
+    def __repr__(self):
+        return "Begin(%s)" % self.state_name

-  def same_as(self, other):
-    return isinstance(other, Begin) and self.state_name == other.state_name
+    def same_as(self, other):
+        return isinstance(other, Begin) and self.state_name == other.state_name


 class Ignore(Action):
-  """
-  IGNORE is a Plex action which causes its associated token
-  to be ignored. See the docstring of Plex.Lexicon  for more
-  information.
-  """
-  def perform(self, token_stream, text):
-    return None
+    """
+    IGNORE is a Plex action which causes its associated token
+    to be ignored. See the docstring of Plex.Lexicon  for more
+    information.
+    """
+
+    def perform(self, token_stream, text):
+        return None
+
+    def __repr__(self):
+        return "IGNORE"

-  def __repr__(self):
-    return "IGNORE"

 IGNORE = Ignore()
 #IGNORE.__doc__ = Ignore.__doc__

+
 class Text(Action):
-  """
-  TEXT is a Plex action which causes the text of a token to
-  be returned as the value of the token. See the docstring of
-  Plex.Lexicon  for more information.
-  """
+    """
+    TEXT is a Plex action which causes the text of a token to
+    be returned as the value of the token. See the docstring of
+    Plex.Lexicon  for more information.
+    """
+
+    def perform(self, token_stream, text):
+        return text

-  def perform(self, token_stream, text):
-    return text
+    def __repr__(self):
+        return "TEXT"

-  def __repr__(self):
-    return "TEXT"

 TEXT = Text()
 #TEXT.__doc__ = Text.__doc__

--- a/Cython/Plex/DFA.py
+++ b/Cython/Plex/DFA.py
@@ -13,147 +13,152 @@ from .Machines import LOWEST_PRIORITY
 from .Transitions import TransitionMap


-def nfa_to_dfa(old_machine, debug = None):
-  """
-  Given a nondeterministic Machine, return a new equivalent
-  Machine which is deterministic.
-  """
-  # We build a new machine whose states correspond to sets of states
-  # in the old machine. Initially we add a new state corresponding to
-  # the epsilon-closure of each initial old state. Then we give transitions
-  # to each new state which are the union of all transitions out of any
-  # of the corresponding old states. The new state reached on a given
-  # character is the one corresponding to the set of states reachable
-  # on that character from any of the old states. As new combinations of
-  # old states are created, new states are added as needed until closure
-  # is reached.
-  new_machine = Machines.FastMachine()
-  state_map = StateMap(new_machine)
-  # Seed the process using the initial states of the old machine.
-  # Make the corresponding new states into initial states of the new
-  # machine with the same names.
-  for (key, old_state) in old_machine.initial_states.iteritems():
-    new_state = state_map.old_to_new(epsilon_closure(old_state))
-    new_machine.make_initial_state(key, new_state)
-  # Tricky bit here: we add things to the end of this list while we're
-  # iterating over it. The iteration stops when closure is achieved.
-  for new_state in new_machine.states:
-    transitions = TransitionMap()
-    for old_state in state_map.new_to_old(new_state):
-      for event, old_target_states in old_state.transitions.iteritems():
-        if event and old_target_states:
-          transitions.add_set(event, set_epsilon_closure(old_target_states))
-    for event, old_states in transitions.iteritems():
-      new_machine.add_transitions(new_state, event, state_map.old_to_new(old_states))
-  if debug:
-    debug.write("\n===== State Mapping =====\n")
-    state_map.dump(debug)
-  return new_machine
+def nfa_to_dfa(old_machine, debug=None):
+    """
+    Given a nondeterministic Machine, return a new equivalent
+    Machine which is deterministic.
+    """
+    # We build a new machine whose states correspond to sets of states
+    # in the old machine. Initially we add a new state corresponding to
+    # the epsilon-closure of each initial old state. Then we give transitions
+    # to each new state which are the union of all transitions out of any
+    # of the corresponding old states. The new state reached on a given
+    # character is the one corresponding to the set of states reachable
+    # on that character from any of the old states. As new combinations of
+    # old states are created, new states are added as needed until closure
+    # is reached.
+    new_machine = Machines.FastMachine()
+    state_map = StateMap(new_machine)
+    # Seed the process using the initial states of the old machine.
+    # Make the corresponding new states into initial states of the new
+    # machine with the same names.
+    for (key, old_state) in old_machine.initial_states.iteritems():
+        new_state = state_map.old_to_new(epsilon_closure(old_state))
+        new_machine.make_initial_state(key, new_state)
+    # Tricky bit here: we add things to the end of this list while we're
+    # iterating over it. The iteration stops when closure is achieved.
+    for new_state in new_machine.states:
+        transitions = TransitionMap()
+        for old_state in state_map.new_to_old(new_state):
+            for event, old_target_states in old_state.transitions.iteritems():
+                if event and old_target_states:
+                    transitions.add_set(event, set_epsilon_closure(old_target_states))
+        for event, old_states in transitions.iteritems():
+            new_machine.add_transitions(new_state, event, state_map.old_to_new(old_states))
+    if debug:
+        debug.write("\n===== State Mapping =====\n")
+        state_map.dump(debug)
+    return new_machine
+

 def set_epsilon_closure(state_set):
-  """
-  Given a set of states, return the union of the epsilon
-  closures of its member states.
-  """
-  result = {}
-  for state1 in state_set:
-    for state2 in epsilon_closure(state1):
-      result[state2] = 1
-  return result
+    """
+    Given a set of states, return the union of the epsilon
+    closures of its member states.
+    """
+    result = {}
+    for state1 in state_set:
+        for state2 in epsilon_closure(state1):
+            result[state2] = 1
+    return result
+

 def epsilon_closure(state):
-  """
-  Return the set of states reachable from the given state
-  by epsilon moves.
-  """
-  # Cache the result
-  result = state.epsilon_closure
-  if result is None:
-    result = {}
-    state.epsilon_closure = result
-    add_to_epsilon_closure(result, state)
-  return result
+    """
+    Return the set of states reachable from the given state
+    by epsilon moves.
+    """
+    # Cache the result
+    result = state.epsilon_closure
+    if result is None:
+        result = {}
+        state.epsilon_closure = result
+        add_to_epsilon_closure(result, state)
+    return result

-def add_to_epsilon_closure(state_set, state):
-  """
-  Recursively add to |state_set| states reachable from the given state
-  by epsilon moves.
-  """
-  if not state_set.get(state, 0):
-    state_set[state] = 1
-    state_set_2 = state.transitions.get_epsilon()
-    if state_set_2:
-      for state2 in state_set_2:
-        add_to_epsilon_closure(state_set, state2)

-class StateMap(object):
-  """
-  Helper class used by nfa_to_dfa() to map back and forth between
-  sets of states from the old machine and states of the new machine.
-  """
-  new_machine     = None # Machine
-  old_to_new_dict = None # {(old_state,...) : new_state}
-  new_to_old_dict = None # {id(new_state) : old_state_set}
-
-  def __init__(self, new_machine):
-    self.new_machine = new_machine
-    self.old_to_new_dict = {}
-    self.new_to_old_dict= {}
-
-  def old_to_new(self, old_state_set):
+def add_to_epsilon_closure(state_set, state):
    """
-    Return the state of the new machine corresponding to the
-    set of old machine states represented by |state_set|. A new
-    state will be created if necessary. If any of the old states
-    are accepting states, the new state will be an accepting state
-    with the highest priority action from the old states.
+    Recursively add to |state_set| states reachable from the given state
+    by epsilon moves.
    """
-    key = self.make_key(old_state_set)
-    new_state = self.old_to_new_dict.get(key, None)
-    if not new_state:
-      action = self.highest_priority_action(old_state_set)
-      new_state = self.new_machine.new_state(action)
-      self.old_to_new_dict[key] = new_state
-      self.new_to_old_dict[id(new_state)] = old_state_set
-      #for old_state in old_state_set.keys():
-        #new_state.merge_actions(old_state)
-    return new_state
-
-  def highest_priority_action(self, state_set):
-    best_action = None
-    best_priority = LOWEST_PRIORITY
-    for state in state_set:
-      priority = state.action_priority
-      if priority > best_priority:
-        best_action = state.action
-        best_priority = priority
-    return best_action
-
-#    def old_to_new_set(self, old_state_set):
-#        """
-#        Return the new state corresponding to a set of old states as
-#        a singleton set.
-#        """
-#        return {self.old_to_new(old_state_set):1}
-
-  def new_to_old(self, new_state):
-    """Given a new state, return a set of corresponding old states."""
-    return self.new_to_old_dict[id(new_state)]
-
-  def make_key(self, state_set):
+    if not state_set.get(state, 0):
+        state_set[state] = 1
+        state_set_2 = state.transitions.get_epsilon()
+        if state_set_2:
+            for state2 in state_set_2:
+                add_to_epsilon_closure(state_set, state2)
+
+
+class StateMap(object):
    """
-    Convert a set of states into a uniquified
-    sorted tuple suitable for use as a dictionary key.
+    Helper class used by nfa_to_dfa() to map back and forth between
+    sets of states from the old machine and states of the new machine.
    """
-    lst = list(state_set)
-    lst.sort()
-    return tuple(lst)
-
-  def dump(self, file):
-    from .Transitions import state_set_str
-    for new_state in self.new_machine.states:
-      old_state_set = self.new_to_old_dict[id(new_state)]
-      file.write("   State %s <-- %s\n" % (
-        new_state['number'], state_set_str(old_state_set)))
+    new_machine = None      # Machine
+    old_to_new_dict = None  # {(old_state,...) : new_state}
+    new_to_old_dict = None  # {id(new_state) : old_state_set}
+
+    def __init__(self, new_machine):
+        self.new_machine = new_machine
+        self.old_to_new_dict = {}
+        self.new_to_old_dict = {}
+
+    def old_to_new(self, old_state_set):
+        """
+        Return the state of the new machine corresponding to the
+        set of old machine states represented by |state_set|. A new
+        state will be created if necessary. If any of the old states
+        are accepting states, the new state will be an accepting state
+        with the highest priority action from the old states.
+        """
+        key = self.make_key(old_state_set)
+        new_state = self.old_to_new_dict.get(key, None)
+        if not new_state:
+            action = self.highest_priority_action(old_state_set)
+            new_state = self.new_machine.new_state(action)
+            self.old_to_new_dict[key] = new_state
+            self.new_to_old_dict[id(new_state)] = old_state_set
+            #for old_state in old_state_set.keys():
+            #new_state.merge_actions(old_state)
+        return new_state
+
+    def highest_priority_action(self, state_set):
+        best_action = None
+        best_priority = LOWEST_PRIORITY
+        for state in state_set:
+            priority = state.action_priority
+            if priority > best_priority:
+                best_action = state.action
+                best_priority = priority
+        return best_action
+
+    #    def old_to_new_set(self, old_state_set):
+    #        """
+    #        Return the new state corresponding to a set of old states as
+    #        a singleton set.
+    #        """
+    #        return {self.old_to_new(old_state_set):1}
+
+    def new_to_old(self, new_state):
+        """Given a new state, return a set of corresponding old states."""
+        return self.new_to_old_dict[id(new_state)]
+
+    def make_key(self, state_set):
+        """
+        Convert a set of states into a uniquified
+        sorted tuple suitable for use as a dictionary key.
+        """
+        lst = list(state_set)
+        lst.sort()
+        return tuple(lst)
+
+    def dump(self, file):
+        from .Transitions import state_set_str
+
+        for new_state in self.new_machine.states:
+            old_state_set = self.new_to_old_dict[id(new_state)]
+            file.write("   State %s <-- %s\n" % (
+                new_state['number'], state_set_str(old_state_set)))


--- a/Cython/Plex/Errors.py
+++ b/Cython/Plex/Errors.py
@@ -6,45 +6,49 @@
 #
 #=======================================================================

+
 class PlexError(Exception):
-  message = ""
+    message = ""
+

 class PlexTypeError(PlexError, TypeError):
-  pass
+    pass
+

 class PlexValueError(PlexError, ValueError):
-  pass
+    pass
+

 class InvalidRegex(PlexError):
-  pass
+    pass
+

 class InvalidToken(PlexError):
+    def __init__(self, token_number, message):
+        PlexError.__init__(self, "Token number %d: %s" % (token_number, message))

-  def __init__(self, token_number, message):
-    PlexError.__init__(self, "Token number %d: %s" % (token_number, message))

 class InvalidScanner(PlexError):
-  pass
-
-class AmbiguousAction(PlexError):
-  message = "Two tokens with different actions can match the same string"
-
-  def __init__(self):
    pass

-class UnrecognizedInput(PlexError):
-  scanner = None
-  position = None
-  state_name = None

-  def __init__(self, scanner, state_name):
-    self.scanner = scanner
-    self.position = scanner.get_position()
-    self.state_name = state_name
-
-  def __str__(self):
-    return ("'%s', line %d, char %d: Token not recognised in state %s"
-            % (self.position + (repr(self.state_name),)))
+class AmbiguousAction(PlexError):
+    message = "Two tokens with different actions can match the same string"

+    def __init__(self):
+        pass


+class UnrecognizedInput(PlexError):
+    scanner = None
+    position = None
+    state_name = None
+
+    def __init__(self, scanner, state_name):
+        self.scanner = scanner
+        self.position = scanner.get_position()
+        self.state_name = state_name
+
+    def __str__(self):
+        return ("'%s', line %d, char %d: Token not recognised in state %r" % (
+            self.position + (self.state_name,)))
--- a/Cython/Plex/Lexicons.py
+++ b/Cython/Plex/Lexicons.py
@@ -22,177 +22,179 @@ DUMP_DFA = 2


 class State(object):
-  """
-  This class is used as part of a Plex.Lexicon specification to
-  introduce a user-defined state.
+    """
+    This class is used as part of a Plex.Lexicon specification to
+    introduce a user-defined state.

-  Constructor:
+    Constructor:

-     State(name, token_specifications)
-  """
+       State(name, token_specifications)
+    """

-  name = None
-  tokens = None
+    name = None
+    tokens = None
+
+    def __init__(self, name, tokens):
+        self.name = name
+        self.tokens = tokens

-  def __init__(self, name, tokens):
-    self.name = name
-    self.tokens = tokens

 class Lexicon(object):
-  """
-  Lexicon(specification) builds a lexical analyser from the given
-  |specification|. The specification consists of a list of
-  specification items. Each specification item may be either:
-
-     1) A token definition, which is a tuple:
-
-           (pattern, action)
-
-        The |pattern| is a regular axpression built using the
-        constructors defined in the Plex module.
-
-        The |action| is the action to be performed when this pattern
-        is recognised (see below).
-
-     2) A state definition:
-
-           State(name, tokens)
-
-        where |name| is a character string naming the state,
-        and |tokens| is a list of token definitions as
-        above. The meaning and usage of states is described
-        below.
-
-  Actions
-  -------
-
-  The |action| in a token specication may be one of three things:
-
-     1) A function, which is called as follows:
-
-           function(scanner, text)
-
-        where |scanner| is the relevant Scanner instance, and |text|
-        is the matched text. If the function returns anything
-        other than None, that value is returned as the value of the
-        token. If it returns None, scanning continues as if the IGNORE
-        action were specified (see below).
-
-      2) One of the following special actions:
-
-         IGNORE means that the recognised characters will be treated as
-                white space and ignored. Scanning will continue until
-                the next non-ignored token is recognised before returning.
-
-         TEXT   causes the scanned text itself to be returned as the
-                value of the token.
-
-      3) Any other value, which is returned as the value of the token.
-
-  States
-  ------
-
-  At any given time, the scanner is in one of a number of states.
-  Associated with each state is a set of possible tokens. When scanning,
-  only tokens associated with the current state are recognised.
-
-  There is a default state, whose name is the empty string. Token
-  definitions which are not inside any State definition belong to
-  the default state.
-
-  The initial state of the scanner is the default state. The state can
-  be changed in one of two ways:
-
-     1) Using Begin(state_name) as the action of a token.
-
-     2) Calling the begin(state_name) method of the Scanner.
-
-  To change back to the default state, use '' as the state name.
-  """
-
-  machine = None # Machine
-  tables = None # StateTableMachine
-
-  def __init__(self, specifications, debug = None, debug_flags = 7, timings = None):
-    if type(specifications) != types.ListType:
-      raise Errors.InvalidScanner("Scanner definition is not a list")
-    if timings:
-      from .Timing import time
-      total_time = 0.0
-      time1 = time()
-    nfa = Machines.Machine()
-    default_initial_state = nfa.new_initial_state('')
-    token_number = 1
-    for spec in specifications:
-      if isinstance(spec, State):
-        user_initial_state = nfa.new_initial_state(spec.name)
-        for token in spec.tokens:
-          self.add_token_to_machine(
-            nfa, user_initial_state, token, token_number)
-          token_number = token_number + 1
-      elif type(spec) == types.TupleType:
-        self.add_token_to_machine(
-          nfa, default_initial_state, spec, token_number)
-        token_number = token_number + 1
-      else:
-        raise Errors.InvalidToken(
-          token_number,
-          "Expected a token definition (tuple) or State instance")
-    if timings:
-      time2 = time()
-      total_time = total_time + (time2 - time1)
-      time3 = time()
-    if debug and (debug_flags & 1):
-      debug.write("\n============= NFA ===========\n")
-      nfa.dump(debug)
-    dfa = DFA.nfa_to_dfa(nfa, debug = (debug_flags & 3) == 3 and debug)
-    if timings:
-      time4 = time()
-      total_time = total_time + (time4 - time3)
-    if debug and (debug_flags & 2):
-      debug.write("\n============= DFA ===========\n")
-      dfa.dump(debug)
-    if timings:
-      timings.write("Constructing NFA : %5.2f\n" % (time2 - time1))
-      timings.write("Converting to DFA: %5.2f\n" % (time4 - time3))
-      timings.write("TOTAL            : %5.2f\n" % total_time)
-    self.machine = dfa
-
-  def add_token_to_machine(self, machine, initial_state, token_spec, token_number):
-    try:
-      (re, action_spec) = self.parse_token_definition(token_spec)
-      # Disabled this -- matching empty strings can be useful
-      #if re.nullable:
-      #  raise Errors.InvalidToken(
-      #    token_number, "Pattern can match 0 input symbols")
-      if isinstance(action_spec, Actions.Action):
-        action = action_spec
-      else:
+    """
+    Lexicon(specification) builds a lexical analyser from the given
+    |specification|. The specification consists of a list of
+    specification items. Each specification item may be either:
+
+       1) A token definition, which is a tuple:
+
+             (pattern, action)
+
+          The |pattern| is a regular axpression built using the
+          constructors defined in the Plex module.
+
+          The |action| is the action to be performed when this pattern
+          is recognised (see below).
+
+       2) A state definition:
+
+             State(name, tokens)
+
+          where |name| is a character string naming the state,
+          and |tokens| is a list of token definitions as
+          above. The meaning and usage of states is described
+          below.
+
+    Actions
+    -------
+
+    The |action| in a token specication may be one of three things:
+
+       1) A function, which is called as follows:
+
+             function(scanner, text)
+
+          where |scanner| is the relevant Scanner instance, and |text|
+          is the matched text. If the function returns anything
+          other than None, that value is returned as the value of the
+          token. If it returns None, scanning continues as if the IGNORE
+          action were specified (see below).
+
+        2) One of the following special actions:
+
+           IGNORE means that the recognised characters will be treated as
+                  white space and ignored. Scanning will continue until
+                  the next non-ignored token is recognised before returning.
+
+           TEXT   causes the scanned text itself to be returned as the
+                  value of the token.
+
+        3) Any other value, which is returned as the value of the token.
+
+    States
+    ------
+
+    At any given time, the scanner is in one of a number of states.
+    Associated with each state is a set of possible tokens. When scanning,
+    only tokens associated with the current state are recognised.
+
+    There is a default state, whose name is the empty string. Token
+    definitions which are not inside any State definition belong to
+    the default state.
+
+    The initial state of the scanner is the default state. The state can
+    be changed in one of two ways:
+
+       1) Using Begin(state_name) as the action of a token.
+
+       2) Calling the begin(state_name) method of the Scanner.
+
+    To change back to the default state, use '' as the state name.
+    """
+
+    machine = None  # Machine
+    tables = None   # StateTableMachine
+
+    def __init__(self, specifications, debug=None, debug_flags=7, timings=None):
+        if type(specifications) != types.ListType:
+            raise Errors.InvalidScanner("Scanner definition is not a list")
+        if timings:
+            from .Timing import time
+
+            total_time = 0.0
+            time1 = time()
+        nfa = Machines.Machine()
+        default_initial_state = nfa.new_initial_state('')
+        token_number = 1
+        for spec in specifications:
+            if isinstance(spec, State):
+                user_initial_state = nfa.new_initial_state(spec.name)
+                for token in spec.tokens:
+                    self.add_token_to_machine(
+                        nfa, user_initial_state, token, token_number)
+                    token_number += 1
+            elif type(spec) == types.TupleType:
+                self.add_token_to_machine(
+                    nfa, default_initial_state, spec, token_number)
+                token_number += 1
+            else:
+                raise Errors.InvalidToken(
+                    token_number,
+                    "Expected a token definition (tuple) or State instance")
+        if timings:
+            time2 = time()
+            total_time = total_time + (time2 - time1)
+            time3 = time()
+        if debug and (debug_flags & 1):
+            debug.write("\n============= NFA ===========\n")
+            nfa.dump(debug)
+        dfa = DFA.nfa_to_dfa(nfa, debug=(debug_flags & 3) == 3 and debug)
+        if timings:
+            time4 = time()
+            total_time = total_time + (time4 - time3)
+        if debug and (debug_flags & 2):
+            debug.write("\n============= DFA ===========\n")
+            dfa.dump(debug)
+        if timings:
+            timings.write("Constructing NFA : %5.2f\n" % (time2 - time1))
+            timings.write("Converting to DFA: %5.2f\n" % (time4 - time3))
+            timings.write("TOTAL            : %5.2f\n" % total_time)
+        self.machine = dfa
+
+    def add_token_to_machine(self, machine, initial_state, token_spec, token_number):
        try:
-          action_spec.__call__
-        except AttributeError:
-          action = Actions.Return(action_spec)
-        else:
-          action = Actions.Call(action_spec)
-      final_state = machine.new_state()
-      re.build_machine(machine, initial_state, final_state,
-                       match_bol = 1, nocase = 0)
-      final_state.set_action(action, priority = -token_number)
-    except Errors.PlexError, e:
-      raise e.__class__("Token number %d: %s" % (token_number, e))
-
-  def parse_token_definition(self, token_spec):
-    if type(token_spec) != types.TupleType:
-      raise Errors.InvalidToken("Token definition is not a tuple")
-    if len(token_spec) != 2:
-      raise Errors.InvalidToken("Wrong number of items in token definition")
-    pattern, action = token_spec
-    if not isinstance(pattern, Regexps.RE):
-      raise Errors.InvalidToken("Pattern is not an RE instance")
-    return (pattern, action)
-
-  def get_initial_state(self, name):
-    return self.machine.get_initial_state(name)
+            (re, action_spec) = self.parse_token_definition(token_spec)
+            # Disabled this -- matching empty strings can be useful
+            #if re.nullable:
+            #  raise Errors.InvalidToken(
+            #    token_number, "Pattern can match 0 input symbols")
+            if isinstance(action_spec, Actions.Action):
+                action = action_spec
+            else:
+                try:
+                    action_spec.__call__
+                except AttributeError:
+                    action = Actions.Return(action_spec)
+                else:
+                    action = Actions.Call(action_spec)
+            final_state = machine.new_state()
+            re.build_machine(machine, initial_state, final_state,
+                             match_bol=1, nocase=0)
+            final_state.set_action(action, priority=-token_number)
+        except Errors.PlexError, e:
+            raise e.__class__("Token number %d: %s" % (token_number, e))
+
+    def parse_token_definition(self, token_spec):
+        if type(token_spec) != types.TupleType:
+            raise Errors.InvalidToken("Token definition is not a tuple")
+        if len(token_spec) != 2:
+            raise Errors.InvalidToken("Wrong number of items in token definition")
+        pattern, action = token_spec
+        if not isinstance(pattern, Regexps.RE):
+            raise Errors.InvalidToken("Pattern is not an RE instance")
+        return (pattern, action)
+
+    def get_initial_state(self, name):
+        return self.machine.get_initial_state(name)



--- a/Cython/Plex/Machines.py
+++ b/Cython/Plex/Machines.py
@@ -16,244 +16,245 @@ LOWEST_PRIORITY = -sys.maxint


 class Machine(object):
-  """A collection of Nodes representing an NFA or DFA."""
-  states = None         # [Node]
-  next_state_number = 1
-  initial_states = None # {(name, bol): Node}
-
-  def __init__(self):
-    self.states = []
-    self.initial_states = {}
-
-  def __del__(self):
-    #print "Destroying", self ###
-    for state in self.states:
-      state.destroy()
-
-  def new_state(self):
-    """Add a new state to the machine and return it."""
-    s = Node()
-    n = self.next_state_number
-    self.next_state_number = n + 1
-    s.number = n
-    self.states.append(s)
-    return s
-
-  def new_initial_state(self, name):
-    state = self.new_state()
-    self.make_initial_state(name, state)
-    return state
-
-  def make_initial_state(self, name, state):
-    self.initial_states[name] = state
-
-  def get_initial_state(self, name):
-    return self.initial_states[name]
-
-  def dump(self, file):
-    file.write("Plex.Machine:\n")
-    if self.initial_states is not None:
-      file.write("   Initial states:\n")
-      for (name, state) in self.initial_states.iteritems():
-        file.write("      '%s': %d\n" % (name, state.number))
-    for s in self.states:
-      s.dump(file)
+    """A collection of Nodes representing an NFA or DFA."""
+    states = None          # [Node]
+    next_state_number = 1
+    initial_states = None  # {(name, bol): Node}
+
+    def __init__(self):
+        self.states = []
+        self.initial_states = {}
+
+    def __del__(self):
+        #print "Destroying", self ###
+        for state in self.states:
+            state.destroy()
+
+    def new_state(self):
+        """Add a new state to the machine and return it."""
+        s = Node()
+        n = self.next_state_number
+        self.next_state_number = n + 1
+        s.number = n
+        self.states.append(s)
+        return s
+
+    def new_initial_state(self, name):
+        state = self.new_state()
+        self.make_initial_state(name, state)
+        return state
+
+    def make_initial_state(self, name, state):
+        self.initial_states[name] = state
+
+    def get_initial_state(self, name):
+        return self.initial_states[name]
+
+    def dump(self, file):
+        file.write("Plex.Machine:\n")
+        if self.initial_states is not None:
+            file.write("   Initial states:\n")
+            for (name, state) in self.initial_states.iteritems():
+                file.write("      '%s': %d\n" % (name, state.number))
+        for s in self.states:
+            s.dump(file)
+

 class Node(object):
-  """A state of an NFA or DFA."""
-  transitions = None       # TransitionMap
-  action = None            # Action
-  action_priority = None   # integer
-  number = 0               # for debug output
-  epsilon_closure = None   # used by nfa_to_dfa()
-
-  def __init__(self):
-    # Preinitialise the list of empty transitions, because
-    # the nfa-to-dfa algorithm needs it
-    #self.transitions = {'':[]}
-    self.transitions = TransitionMap()
-    self.action_priority = LOWEST_PRIORITY
-
-  def destroy(self):
-    #print "Destroying", self ###
-    self.transitions = None
-    self.action = None
-    self.epsilon_closure = None
-
-  def add_transition(self, event, new_state):
-    self.transitions.add(event, new_state)
-
-  def link_to(self, state):
-    """Add an epsilon-move from this state to another state."""
-    self.add_transition('', state)
-
-  def set_action(self, action, priority):
-    """Make this an accepting state with the given action. If
-    there is already an action, choose the action with highest
-    priority."""
-    if priority > self.action_priority:
-      self.action = action
-      self.action_priority = priority
-
-  def get_action(self):
-    return self.action
-
-  def get_action_priority(self):
-    return self.action_priority
-
-  def is_accepting(self):
-    return self.action is not None
-
-  def __str__(self):
-    return "State %d" % self.number
-
-  def dump(self, file):
-    # Header
-    file.write("   State %d:\n" % self.number)
-    # Transitions
-#        self.dump_transitions(file)
-    self.transitions.dump(file)
-    # Action
-    action = self.action
-    priority = self.action_priority
-    if action is not None:
-      file.write("      %s [priority %d]\n" % (action, priority))
-
-  def __lt__(self, other):
-    return self.number < other.number
+    """A state of an NFA or DFA."""
+    transitions = None      # TransitionMap
+    action = None           # Action
+    action_priority = None  # integer
+    number = 0              # for debug output
+    epsilon_closure = None  # used by nfa_to_dfa()
+
+    def __init__(self):
+        # Preinitialise the list of empty transitions, because
+        # the nfa-to-dfa algorithm needs it
+        #self.transitions = {'':[]}
+        self.transitions = TransitionMap()
+        self.action_priority = LOWEST_PRIORITY
+
+    def destroy(self):
+        #print "Destroying", self ###
+        self.transitions = None
+        self.action = None
+        self.epsilon_closure = None
+
+    def add_transition(self, event, new_state):
+        self.transitions.add(event, new_state)
+
+    def link_to(self, state):
+        """Add an epsilon-move from this state to another state."""
+        self.add_transition('', state)
+
+    def set_action(self, action, priority):
+        """Make this an accepting state with the given action. If
+        there is already an action, choose the action with highest
+        priority."""
+        if priority > self.action_priority:
+            self.action = action
+            self.action_priority = priority
+
+    def get_action(self):
+        return self.action
+
+    def get_action_priority(self):
+        return self.action_priority
+
+    def is_accepting(self):
+        return self.action is not None
+
+    def __str__(self):
+        return "State %d" % self.number
+
+    def dump(self, file):
+        # Header
+        file.write("   State %d:\n" % self.number)
+        # Transitions
+        #        self.dump_transitions(file)
+        self.transitions.dump(file)
+        # Action
+        action = self.action
+        priority = self.action_priority
+        if action is not None:
+            file.write("      %s [priority %d]\n" % (action, priority))
+
+    def __lt__(self, other):
+        return self.number < other.number
+

 class FastMachine(object):
-  """
-  FastMachine is a deterministic machine represented in a way that
-  allows fast scanning.
-  """
-  initial_states = None # {state_name:state}
-  states = None         # [state]
-                        # where state = {event:state, 'else':state, 'action':Action}
-  next_number = 1       # for debugging
-
-  new_state_template = {
-    '':None, 'bol':None, 'eol':None, 'eof':None, 'else':None
-  }
-
-  def __init__(self, old_machine = None):
-    self.initial_states = initial_states = {}
-    self.states = []
-    if old_machine:
-      self.old_to_new = old_to_new = {}
-      for old_state in old_machine.states:
-        new_state = self.new_state()
-        old_to_new[old_state] = new_state
-      for name, old_state in old_machine.initial_states.iteritems():
-        initial_states[name] = old_to_new[old_state]
-      for old_state in old_machine.states:
-        new_state = old_to_new[old_state]
-        for event, old_state_set in old_state.transitions.iteritems():
-          if old_state_set:
-            new_state[event] = old_to_new[old_state_set.keys()[0]]
-          else:
-            new_state[event] = None
-        new_state['action'] = old_state.action
-
-  def __del__(self):
-    for state in self.states:
-      state.clear()
-
-  def new_state(self, action = None):
-    number = self.next_number
-    self.next_number = number + 1
-    result = self.new_state_template.copy()
-    result['number'] = number
-    result['action'] = action
-    self.states.append(result)
-    return result
-
-  def make_initial_state(self, name, state):
-    self.initial_states[name] = state
-
-  def add_transitions(self, state, event, new_state, maxint=sys.maxint):
-    if type(event) is tuple:
-      code0, code1 = event
-      if code0 == -maxint:
-        state['else'] = new_state
-      elif code1 != maxint:
-        while code0 < code1:
-          state[chr(code0)] = new_state
-          code0 = code0 + 1
-    else:
-      state[event] = new_state
-
-  def get_initial_state(self, name):
-    return self.initial_states[name]
-
-  def dump(self, file):
-    file.write("Plex.FastMachine:\n")
-    file.write("   Initial states:\n")
-    for name, state in self.initial_states.iteritems():
-      file.write("      %s: %s\n" % (repr(name), state['number']))
-    for state in self.states:
-      self.dump_state(state, file)
-
-  def dump_state(self, state, file):
-    # Header
-    file.write("   State %d:\n" % state['number'])
-    # Transitions
-    self.dump_transitions(state, file)
-    # Action
-    action = state['action']
-    if action is not None:
-      file.write("      %s\n" % action)
-
-  def dump_transitions(self, state, file):
-    chars_leading_to_state = {}
-    special_to_state = {}
-    for (c, s) in state.iteritems():
-      if len(c) == 1:
-        chars = chars_leading_to_state.get(id(s), None)
-        if chars is None:
-          chars = []
-          chars_leading_to_state[id(s)] = chars
-        chars.append(c)
-      elif len(c) <= 4:
-        special_to_state[c] = s
-    ranges_to_state = {}
-    for state in self.states:
-      char_list = chars_leading_to_state.get(id(state), None)
-      if char_list:
-        ranges = self.chars_to_ranges(char_list)
-        ranges_to_state[ranges] = state
-    ranges_list = ranges_to_state.keys()
-    ranges_list.sort()
-    for ranges in ranges_list:
-      key = self.ranges_to_string(ranges)
-      state = ranges_to_state[ranges]
-      file.write("      %s --> State %d\n" % (key, state['number']))
-    for key in ('bol', 'eol', 'eof', 'else'):
-      state = special_to_state.get(key, None)
-      if state:
-        file.write("      %s --> State %d\n" % (key, state['number']))
-
-  def chars_to_ranges(self, char_list):
-    char_list.sort()
-    i = 0
-    n = len(char_list)
-    result = []
-    while i < n:
-      c1 = ord(char_list[i])
-      c2 = c1
-      i = i + 1
-      while i < n and ord(char_list[i]) == c2 + 1:
-        i = i + 1
-        c2 = c2 + 1
-      result.append((chr(c1), chr(c2)))
-    return tuple(result)
-
-  def ranges_to_string(self, range_list):
-    return ','.join(map(self.range_to_string, range_list))
-
-  def range_to_string(self, range_tuple):
-    (c1, c2) = range_tuple
-    if c1 == c2:
-      return repr(c1)
-    else:
-      return "%s..%s" % (repr(c1), repr(c2))
+    """
+    FastMachine is a deterministic machine represented in a way that
+    allows fast scanning.
+    """
+    initial_states = None  # {state_name:state}
+    states = None          # [state]  where state = {event:state, 'else':state, 'action':Action}
+    next_number = 1        # for debugging
+
+    new_state_template = {
+        '': None, 'bol': None, 'eol': None, 'eof': None, 'else': None
+    }
+
+    def __init__(self, old_machine=None):
+        self.initial_states = initial_states = {}
+        self.states = []
+        if old_machine:
+            self.old_to_new = old_to_new = {}
+            for old_state in old_machine.states:
+                new_state = self.new_state()
+                old_to_new[old_state] = new_state
+            for name, old_state in old_machine.initial_states.iteritems():
+                initial_states[name] = old_to_new[old_state]
+            for old_state in old_machine.states:
+                new_state = old_to_new[old_state]
+                for event, old_state_set in old_state.transitions.iteritems():
+                    if old_state_set:
+                        new_state[event] = old_to_new[old_state_set.keys()[0]]
+                    else:
+                        new_state[event] = None
+                new_state['action'] = old_state.action
+
+    def __del__(self):
+        for state in self.states:
+            state.clear()
+
+    def new_state(self, action=None):
+        number = self.next_number
+        self.next_number = number + 1
+        result = self.new_state_template.copy()
+        result['number'] = number
+        result['action'] = action
+        self.states.append(result)
+        return result
+
+    def make_initial_state(self, name, state):
+        self.initial_states[name] = state
+
+    def add_transitions(self, state, event, new_state, maxint=sys.maxint):
+        if type(event) is tuple:
+            code0, code1 = event
+            if code0 == -maxint:
+                state['else'] = new_state
+            elif code1 != maxint:
+                while code0 < code1:
+                    state[unichr(code0)] = new_state
+                    code0 += 1
+        else:
+            state[event] = new_state
+
+    def get_initial_state(self, name):
+        return self.initial_states[name]
+
+    def dump(self, file):
+        file.write("Plex.FastMachine:\n")
+        file.write("   Initial states:\n")
+        for name, state in self.initial_states.iteritems():
+            file.write("      %s: %s\n" % (repr(name), state['number']))
+        for state in self.states:
+            self.dump_state(state, file)
+
+    def dump_state(self, state, file):
+        # Header
+        file.write("   State %d:\n" % state['number'])
+        # Transitions
+        self.dump_transitions(state, file)
+        # Action
+        action = state['action']
+        if action is not None:
+            file.write("      %s\n" % action)
+
+    def dump_transitions(self, state, file):
+        chars_leading_to_state = {}
+        special_to_state = {}
+        for (c, s) in state.iteritems():
+            if len(c) == 1:
+                chars = chars_leading_to_state.get(id(s), None)
+                if chars is None:
+                    chars = []
+                    chars_leading_to_state[id(s)] = chars
+                chars.append(c)
+            elif len(c) <= 4:
+                special_to_state[c] = s
+        ranges_to_state = {}
+        for state in self.states:
+            char_list = chars_leading_to_state.get(id(state), None)
+            if char_list:
+                ranges = self.chars_to_ranges(char_list)
+                ranges_to_state[ranges] = state
+        ranges_list = ranges_to_state.keys()
+        ranges_list.sort()
+        for ranges in ranges_list:
+            key = self.ranges_to_string(ranges)
+            state = ranges_to_state[ranges]
+            file.write("      %s --> State %d\n" % (key, state['number']))
+        for key in ('bol', 'eol', 'eof', 'else'):
+            state = special_to_state.get(key, None)
+            if state:
+                file.write("      %s --> State %d\n" % (key, state['number']))
+
+    def chars_to_ranges(self, char_list):
+        char_list.sort()
+        i = 0
+        n = len(char_list)
+        result = []
+        while i < n:
+            c1 = ord(char_list[i])
+            c2 = c1
+            i += 1
+            while i < n and ord(char_list[i]) == c2 + 1:
+                i += 1
+                c2 += 1
+            result.append((chr(c1), chr(c2)))
+        return tuple(result)
+
+    def ranges_to_string(self, range_list):
+        return ','.join(map(self.range_to_string, range_list))
+
+    def range_to_string(self, range_tuple):
+        (c1, c2) = range_tuple
+        if c1 == c2:
+            return repr(c1)
+        else:
+            return "%s..%s" % (repr(c1), repr(c2))
--- a/Cython/Plex/Regexps.py
+++ b/Cython/Plex/Regexps.py
@@ -42,14 +42,15 @@ def chars_to_ranges(s):
    while i < n:
        code1 = ord(char_list[i])
        code2 = code1 + 1
-        i = i + 1
+        i += 1
        while i < n and code2 >= ord(char_list[i]):
-            code2 = code2 + 1
-            i = i + 1
+            code2 += 1
+            i += 1
        result.append(code1)
        result.append(code2)
    return result

+
 def uppercase_range(code1, code2):
    """
    If the range of characters from code1 to code2-1 includes any
@@ -63,6 +64,7 @@ def uppercase_range(code1, code2):
    else:
        return None

+
 def lowercase_range(code1, code2):
    """
    If the range of characters from code1 to code2-1 includes any
@@ -76,6 +78,7 @@ def lowercase_range(code1, code2):
    else:
        return None

+
 def CodeRanges(code_list):
    """
    Given a list of codes as returned by chars_to_ranges, return
@@ -86,6 +89,7 @@ def CodeRanges(code_list):
        re_list.append(CodeRange(code_list[i], code_list[i + 1]))
    return Alt(*re_list)

+
 def CodeRange(code1, code2):
    """
    CodeRange(code1, code2) is an RE which matches any character
@@ -93,11 +97,12 @@ def CodeRange(code1, code2):
    """
    if code1 <= nl_code < code2:
        return Alt(RawCodeRange(code1, nl_code),
-                             RawNewline,
-                             RawCodeRange(nl_code + 1, code2))
+                   RawNewline,
+                   RawCodeRange(nl_code + 1, code2))
    else:
        return RawCodeRange(code1, code2)

+
 #
 #     Abstract classes
 #
@@ -110,12 +115,12 @@ class RE(object):
         re1 | re2         is an RE which matches either |re1| or |re2|
    """

-    nullable = 1 # True if this RE can match 0 input symbols
-    match_nl = 1 # True if this RE can match a string ending with '\n'
-    str = None     # Set to a string to override the class's __str__ result
+    nullable = 1  # True if this RE can match 0 input symbols
+    match_nl = 1  # True if this RE can match a string ending with '\n'
+    str = None    # Set to a string to override the class's __str__ result

    def build_machine(self, machine, initial_state, final_state,
-                                        match_bol, nocase):
+                      match_bol, nocase):
        """
        This method should add states to |machine| to implement this
        RE, starting at |initial_state| and ending at |final_state|.
@@ -124,7 +129,7 @@ class RE(object):
        letters should be treated as equivalent.
        """
        raise NotImplementedError("%s.build_machine not implemented" %
-            self.__class__.__name__)
+                                  self.__class__.__name__)

    def build_opt(self, m, initial_state, c):
        """
@@ -160,18 +165,18 @@ class RE(object):
        self.check_string(num, value)
        if len(value) != 1:
            raise Errors.PlexValueError("Invalid value for argument %d of Plex.%s."
-                "Expected a string of length 1, got: %s" % (
-                    num, self.__class__.__name__, repr(value)))
+                                        "Expected a string of length 1, got: %s" % (
+                                            num, self.__class__.__name__, repr(value)))

    def wrong_type(self, num, value, expected):
        if type(value) == types.InstanceType:
-                got = "%s.%s instance" % (
-                    value.__class__.__module__, value.__class__.__name__)
+            got = "%s.%s instance" % (
+                value.__class__.__module__, value.__class__.__name__)
        else:
            got = type(value).__name__
        raise Errors.PlexTypeError("Invalid type for argument %d of Plex.%s "
-                                        "(expected %s, got %s" % (
-                                            num, self.__class__.__name__, expected, got))
+                                   "(expected %s, got %s" % (
+                                       num, self.__class__.__name__, expected, got))

 #
 #     Primitive RE constructors
@@ -211,6 +216,7 @@ class RE(object):
 ##     def calc_str(self):
 ##         return "Char(%s)" % repr(self.char)

+
 def Char(c):
    """
    Char(c) is an RE which matches the character |c|.
@@ -222,6 +228,7 @@ def Char(c):
    result.str = "Char(%s)" % repr(c)
    return result

+
 class RawCodeRange(RE):
    """
    RawCodeRange(code1, code2) is a low-level RE which matches any character
@@ -230,9 +237,9 @@ class RawCodeRange(RE):
    """
    nullable = 0
    match_nl = 0
-    range = None                     # (code, code)
-    uppercase_range = None # (code, code) or None
-    lowercase_range = None # (code, code) or None
+    range = None            # (code, code)
+    uppercase_range = None  # (code, code) or None
+    lowercase_range = None  # (code, code) or None

    def __init__(self, code1, code2):
        self.range = (code1, code2)
@@ -252,6 +259,7 @@ class RawCodeRange(RE):
    def calc_str(self):
        return "CodeRange(%d,%d)" % (self.code1, self.code2)

+
 class _RawNewline(RE):
    """
    RawNewline is a low-level RE which matches a newline character.
@@ -266,6 +274,7 @@ class _RawNewline(RE):
        s = self.build_opt(m, initial_state, EOL)
        s.add_transition((nl_code, nl_code + 1), final_state)

+
 RawNewline = _RawNewline()


@@ -304,7 +313,7 @@ class Seq(RE):
        i = len(re_list)
        match_nl = 0
        while i:
-            i = i - 1
+            i -= 1
            re = re_list[i]
            if re.match_nl:
                match_nl = 1
@@ -354,7 +363,7 @@ class Alt(RE):
                non_nullable_res.append(re)
            if re.match_nl:
                match_nl = 1
-            i = i + 1
+            i += 1
        self.nullable_res = nullable_res
        self.non_nullable_res = non_nullable_res
        self.nullable = nullable
@@ -411,7 +420,7 @@ class SwitchCase(RE):

    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
        self.re.build_machine(m, initial_state, final_state, match_bol,
-                                                    self.nocase)
+                              self.nocase)

    def calc_str(self):
        if self.nocase:
@@ -434,6 +443,7 @@ Empty.__doc__ = \
    """
 Empty.str = "Empty"

+
 def Str1(s):
    """
    Str1(s) is an RE which matches the literal string |s|.
@@ -442,6 +452,7 @@ def Str1(s):
    result.str = "Str(%s)" % repr(s)
    return result

+
 def Str(*strs):
    """
    Str(s) is an RE which matches the literal string |s|.
@@ -454,6 +465,7 @@ def Str(*strs):
        result.str = "Str(%s)" % ','.join(map(repr, strs))
        return result

+
 def Any(s):
    """
    Any(s) is an RE which matches any character in the string |s|.
@@ -463,6 +475,7 @@ def Any(s):
    result.str = "Any(%s)" % repr(s)
    return result

+
 def AnyBut(s):
    """
    AnyBut(s) is an RE which matches any character (including
@@ -475,6 +488,7 @@ def AnyBut(s):
    result.str = "AnyBut(%s)" % repr(s)
    return result

+
 AnyChar = AnyBut("")
 AnyChar.__doc__ = \
    """
@@ -482,7 +496,8 @@ AnyChar.__doc__ = \
    """
 AnyChar.str = "AnyChar"

-def Range(s1, s2 = None):
+
+def Range(s1, s2=None):
    """
    Range(c1, c2) is an RE which matches any single character in the range
    |c1| to |c2| inclusive.
@@ -495,11 +510,12 @@ def Range(s1, s2 = None):
    else:
        ranges = []
        for i in range(0, len(s1), 2):
-            ranges.append(CodeRange(ord(s1[i]), ord(s1[i+1]) + 1))
+            ranges.append(CodeRange(ord(s1[i]), ord(s1[i + 1]) + 1))
        result = Alt(*ranges)
        result.str = "Range(%s)" % repr(s1)
    return result

+
 def Opt(re):
    """
    Opt(re) is an RE which matches either |re| or the empty string.
@@ -508,6 +524,7 @@ def Opt(re):
    result.str = "Opt(%s)" % re
    return result

+
 def Rep(re):
    """
    Rep(re) is an RE which matches zero or more repetitions of |re|.
@@ -516,12 +533,14 @@ def Rep(re):
    result.str = "Rep(%s)" % re
    return result

+
 def NoCase(re):
    """
    NoCase(re) is an RE which matches the same strings as RE, but treating
    upper and lower case letters as equivalent.
    """
-    return SwitchCase(re, nocase = 1)
+    return SwitchCase(re, nocase=1)
+

 def Case(re):
    """
@@ -529,7 +548,7 @@ def Case(re):
    upper and lower case letters as distinct, i.e. it cancels the effect
    of any enclosing NoCase().
    """
-    return SwitchCase(re, nocase = 0)
+    return SwitchCase(re, nocase=0)

 #
 #     RE Constants

--- a/Cython/Plex/Scanners.pxd
+++ b/Cython/Plex/Scanners.pxd
@@ -31,7 +31,7 @@ cdef class Scanner:
    @cython.locals(input_state=long)
    cdef next_char(self)
    @cython.locals(action=Action)
-    cdef tuple read(self)
+    cpdef tuple read(self)
    cdef tuple scan_a_token(self)
    cdef tuple position(self)


--- a/Cython/Plex/Scanners.py
+++ b/Cython/Plex/Scanners.py
@@ -10,6 +10,7 @@
 from __future__ import absolute_import

 import cython
+
 cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object)

 from . import Errors
@@ -19,317 +20,318 @@ NOT_FOUND = object()


 class Scanner(object):
-  """
-  A Scanner is used to read tokens from a stream of characters
-  using the token set specified by a Plex.Lexicon.
-
-  Constructor:
-
-    Scanner(lexicon, stream, name = '')
+    """
+    A Scanner is used to read tokens from a stream of characters
+    using the token set specified by a Plex.Lexicon.

-      See the docstring of the __init__ method for details.
+    Constructor:

-  Methods:
+      Scanner(lexicon, stream, name = '')

-    See the docstrings of the individual methods for more
-    information.
+        See the docstring of the __init__ method for details.

-    read() --> (value, text)
-      Reads the next lexical token from the stream.
+    Methods:

-    position() --> (name, line, col)
-      Returns the position of the last token read using the
-      read() method.
+      See the docstrings of the individual methods for more
+      information.

-    begin(state_name)
-      Causes scanner to change state.
+      read() --> (value, text)
+        Reads the next lexical token from the stream.

-    produce(value [, text])
-      Causes return of a token value to the caller of the
-      Scanner.
+      position() --> (name, line, col)
+        Returns the position of the last token read using the
+        read() method.

-  """
+      begin(state_name)
+        Causes scanner to change state.

-#  lexicon = None        # Lexicon
-#  stream = None         # file-like object
-#  name = ''
-#  buffer = ''
-#  buf_start_pos = 0     # position in input of start of buffer
-#  next_pos = 0          # position in input of next char to read
-#  cur_pos = 0           # position in input of current char
-#  cur_line = 1          # line number of current char
-#  cur_line_start = 0    # position in input of start of current line
-#  start_pos = 0         # position in input of start of token
-#  start_line = 0        # line number of start of token
-#  start_col = 0         # position in line of start of token
-#  text = None           # text of last token read
-#  initial_state = None  # Node
-#  state_name = ''       # Name of initial state
-#  queue = None          # list of tokens to be returned
-#  trace = 0
+      produce(value [, text])
+        Causes return of a token value to the caller of the
+        Scanner.

-  def __init__(self, lexicon, stream, name = '', initial_pos = None):
    """
-    Scanner(lexicon, stream, name = '')

-      |lexicon| is a Plex.Lexicon instance specifying the lexical tokens
-      to be recognised.
-
-      |stream| can be a file object or anything which implements a
-      compatible read() method.
-
-      |name| is optional, and may be the name of the file being
-      scanned or any other identifying string.
-    """
-    self.trace = 0
-
-    self.buffer = u''
-    self.buf_start_pos = 0
-    self.next_pos = 0
-    self.cur_pos = 0
-    self.cur_line = 1
-    self.start_pos = 0
-    self.start_line = 0
-    self.start_col = 0
-    self.text = None
-    self.state_name = None
-
-    self.lexicon = lexicon
-    self.stream = stream
-    self.name = name
-    self.queue = []
-    self.initial_state = None
-    self.begin('')
-    self.next_pos = 0
-    self.cur_pos = 0
-    self.cur_line_start = 0
-    self.cur_char = BOL
-    self.input_state = 1
-    if initial_pos is not None:
-        self.cur_line, self.cur_line_start = initial_pos[1], -initial_pos[2]
-
-  def read(self):
-    """
-    Read the next lexical token from the stream and return a
-    tuple (value, text), where |value| is the value associated with
-    the token as specified by the Lexicon, and |text| is the actual
-    string read from the stream. Returns (None, '') on end of file.
-    """
-    queue = self.queue
-    while not queue:
-      self.text, action = self.scan_a_token()
-      if action is None:
-        self.produce(None)
-        self.eof()
-      else:
-        value = action.perform(self, self.text)
-        if value is not None:
-          self.produce(value)
-    result = queue[0]
-    del queue[0]
-    return result
-
-  def scan_a_token(self):
-    """
-    Read the next input sequence recognised by the machine
-    and return (text, action). Returns ('', None) on end of
-    file.
-    """
-    self.start_pos = self.cur_pos
-    self.start_line = self.cur_line
-    self.start_col = self.cur_pos - self.cur_line_start
-    action = self.run_machine_inlined()
-    if action is not None:
-      if self.trace:
-        print("Scanner: read: Performing %s %d:%d" % (
-          action, self.start_pos, self.cur_pos))
-      text = self.buffer[self.start_pos - self.buf_start_pos :
-                         self.cur_pos   - self.buf_start_pos]
-      return (text, action)
-    else:
-      if self.cur_pos == self.start_pos:
-        if self.cur_char is EOL:
-          self.next_char()
-        if self.cur_char is None or self.cur_char is EOF:
-          return (u'', None)
-      raise Errors.UnrecognizedInput(self, self.state_name)
-
-  def run_machine_inlined(self):
-    """
-    Inlined version of run_machine for speed.
-    """
-    state = self.initial_state
-    cur_pos = self.cur_pos
-    cur_line = self.cur_line
-    cur_line_start = self.cur_line_start
-    cur_char = self.cur_char
-    input_state = self.input_state
-    next_pos = self.next_pos
-    buffer = self.buffer
-    buf_start_pos = self.buf_start_pos
-    buf_len = len(buffer)
-    b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
-              None, 0, 0, 0, u'', 0, 0
-    trace = self.trace
-    while 1:
-      if trace: #TRACE#
-        print("State %d, %d/%d:%s -->" % ( #TRACE#
-          state['number'], input_state, cur_pos, repr(cur_char)))  #TRACE#
-      # Begin inlined self.save_for_backup()
-      #action = state.action #@slow
-      action = state['action'] #@fast
-      if action is not None:
+    #  lexicon = None        # Lexicon
+    #  stream = None         # file-like object
+    #  name = ''
+    #  buffer = ''
+    #  buf_start_pos = 0     # position in input of start of buffer
+    #  next_pos = 0          # position in input of next char to read
+    #  cur_pos = 0           # position in input of current char
+    #  cur_line = 1          # line number of current char
+    #  cur_line_start = 0    # position in input of start of current line
+    #  start_pos = 0         # position in input of start of token
+    #  start_line = 0        # line number of start of token
+    #  start_col = 0         # position in line of start of token
+    #  text = None           # text of last token read
+    #  initial_state = None  # Node
+    #  state_name = ''       # Name of initial state
+    #  queue = None          # list of tokens to be returned
+    #  trace = 0
+
+    def __init__(self, lexicon, stream, name='', initial_pos=None):
+        """
+        Scanner(lexicon, stream, name = '')
+
+          |lexicon| is a Plex.Lexicon instance specifying the lexical tokens
+          to be recognised.
+
+          |stream| can be a file object or anything which implements a
+          compatible read() method.
+
+          |name| is optional, and may be the name of the file being
+          scanned or any other identifying string.
+        """
+        self.trace = 0
+
+        self.buffer = u''
+        self.buf_start_pos = 0
+        self.next_pos = 0
+        self.cur_pos = 0
+        self.cur_line = 1
+        self.start_pos = 0
+        self.start_line = 0
+        self.start_col = 0
+        self.text = None
+        self.state_name = None
+
+        self.lexicon = lexicon
+        self.stream = stream
+        self.name = name
+        self.queue = []
+        self.initial_state = None
+        self.begin('')
+        self.next_pos = 0
+        self.cur_pos = 0
+        self.cur_line_start = 0
+        self.cur_char = BOL
+        self.input_state = 1
+        if initial_pos is not None:
+            self.cur_line, self.cur_line_start = initial_pos[1], -initial_pos[2]
+
+    def read(self):
+        """
+        Read the next lexical token from the stream and return a
+        tuple (value, text), where |value| is the value associated with
+        the token as specified by the Lexicon, and |text| is the actual
+        string read from the stream. Returns (None, '') on end of file.
+        """
+        queue = self.queue
+        while not queue:
+            self.text, action = self.scan_a_token()
+            if action is None:
+                self.produce(None)
+                self.eof()
+            else:
+                value = action.perform(self, self.text)
+                if value is not None:
+                    self.produce(value)
+        result = queue[0]
+        del queue[0]
+        return result
+
+    def scan_a_token(self):
+        """
+        Read the next input sequence recognised by the machine
+        and return (text, action). Returns ('', None) on end of
+        file.
+        """
+        self.start_pos = self.cur_pos
+        self.start_line = self.cur_line
+        self.start_col = self.cur_pos - self.cur_line_start
+        action = self.run_machine_inlined()
+        if action is not None:
+            if self.trace:
+                print("Scanner: read: Performing %s %d:%d" % (
+                    action, self.start_pos, self.cur_pos))
+            text = self.buffer[
+                self.start_pos - self.buf_start_pos:
+                self.cur_pos - self.buf_start_pos]
+            return (text, action)
+        else:
+            if self.cur_pos == self.start_pos:
+                if self.cur_char is EOL:
+                    self.next_char()
+                if self.cur_char is None or self.cur_char is EOF:
+                    return (u'', None)
+            raise Errors.UnrecognizedInput(self, self.state_name)
+
+    def run_machine_inlined(self):
+        """
+        Inlined version of run_machine for speed.
+        """
+        state = self.initial_state
+        cur_pos = self.cur_pos
+        cur_line = self.cur_line
+        cur_line_start = self.cur_line_start
+        cur_char = self.cur_char
+        input_state = self.input_state
+        next_pos = self.next_pos
+        buffer = self.buffer
+        buf_start_pos = self.buf_start_pos
+        buf_len = len(buffer)
        b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
-                  action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos
-      # End inlined self.save_for_backup()
-      c = cur_char
-      #new_state = state.new_state(c) #@slow
-      new_state = state.get(c, NOT_FOUND) #@fast
-      if new_state is NOT_FOUND: #@fast
-        new_state = c and state.get('else') #@fast
-      if new_state:
-        if trace: #TRACE#
-          print("State %d" % new_state['number'])  #TRACE#
-        state = new_state
-        # Begin inlined: self.next_char()
+            None, 0, 0, 0, u'', 0, 0
+        trace = self.trace
+        while 1:
+            if trace:  #TRACE#
+                print("State %d, %d/%d:%s -->" % (  #TRACE#
+                    state['number'], input_state, cur_pos, repr(cur_char)))  #TRACE#
+            # Begin inlined self.save_for_backup()
+            #action = state.action #@slow
+            action = state['action']  #@fast
+            if action is not None:
+                b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
+                    action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos
+            # End inlined self.save_for_backup()
+            c = cur_char
+            #new_state = state.new_state(c) #@slow
+            new_state = state.get(c, NOT_FOUND)  #@fast
+            if new_state is NOT_FOUND:  #@fast
+                new_state = c and state.get('else')  #@fast
+            if new_state:
+                if trace:  #TRACE#
+                    print("State %d" % new_state['number'])  #TRACE#
+                state = new_state
+                # Begin inlined: self.next_char()
+                if input_state == 1:
+                    cur_pos = next_pos
+                    # Begin inlined: c = self.read_char()
+                    buf_index = next_pos - buf_start_pos
+                    if buf_index < buf_len:
+                        c = buffer[buf_index]
+                        next_pos += 1
+                    else:
+                        discard = self.start_pos - buf_start_pos
+                        data = self.stream.read(0x1000)
+                        buffer = self.buffer[discard:] + data
+                        self.buffer = buffer
+                        buf_start_pos += discard
+                        self.buf_start_pos = buf_start_pos
+                        buf_len = len(buffer)
+                        buf_index -= discard
+                        if data:
+                            c = buffer[buf_index]
+                            next_pos += 1
+                        else:
+                            c = u''
+                    # End inlined: c = self.read_char()
+                    if c == u'\n':
+                        cur_char = EOL
+                        input_state = 2
+                    elif not c:
+                        cur_char = EOL
+                        input_state = 4
+                    else:
+                        cur_char = c
+                elif input_state == 2:
+                    cur_char = u'\n'
+                    input_state = 3
+                elif input_state == 3:
+                    cur_line += 1
+                    cur_line_start = cur_pos = next_pos
+                    cur_char = BOL
+                    input_state = 1
+                elif input_state == 4:
+                    cur_char = EOF
+                    input_state = 5
+                else:  # input_state = 5
+                    cur_char = u''
+                    # End inlined self.next_char()
+            else:  # not new_state
+                if trace:  #TRACE#
+                    print("blocked")  #TRACE#
+                # Begin inlined: action = self.back_up()
+                if b_action is not None:
+                    (action, cur_pos, cur_line, cur_line_start,
+                     cur_char, input_state, next_pos) = \
+                        (b_action, b_cur_pos, b_cur_line, b_cur_line_start,
+                         b_cur_char, b_input_state, b_next_pos)
+                else:
+                    action = None
+                break  # while 1
+                # End inlined: action = self.back_up()
+        self.cur_pos = cur_pos
+        self.cur_line = cur_line
+        self.cur_line_start = cur_line_start
+        self.cur_char = cur_char
+        self.input_state = input_state
+        self.next_pos = next_pos
+        if trace:  #TRACE#
+            if action is not None:  #TRACE#
+                print("Doing %s" % action)  #TRACE#
+        return action
+
+    def next_char(self):
+        input_state = self.input_state
+        if self.trace:
+            print("Scanner: next: %s [%d] %d" % (" " * 20, input_state, self.cur_pos))
        if input_state == 1:
-          cur_pos = next_pos
-          # Begin inlined: c = self.read_char()
-          buf_index = next_pos - buf_start_pos
-          if buf_index < buf_len:
-            c = buffer[buf_index]
-            next_pos = next_pos + 1
-          else:
-            discard = self.start_pos - buf_start_pos
-            data = self.stream.read(0x1000)
-            buffer = self.buffer[discard:] + data
-            self.buffer = buffer
-            buf_start_pos = buf_start_pos + discard
-            self.buf_start_pos = buf_start_pos
-            buf_len = len(buffer)
-            buf_index = buf_index - discard
-            if data:
-              c = buffer[buf_index]
-              next_pos = next_pos + 1
+            self.cur_pos = self.next_pos
+            c = self.read_char()
+            if c == u'\n':
+                self.cur_char = EOL
+                self.input_state = 2
+            elif not c:
+                self.cur_char = EOL
+                self.input_state = 4
            else:
-              c = u''
-          # End inlined: c = self.read_char()
-          if c == u'\n':
-            cur_char = EOL
-            input_state = 2
-          elif not c:
-            cur_char = EOL
-            input_state = 4
-          else:
-            cur_char = c
+                self.cur_char = c
        elif input_state == 2:
-          cur_char = u'\n'
-          input_state = 3
+            self.cur_char = u'\n'
+            self.input_state = 3
        elif input_state == 3:
-          cur_line = cur_line + 1
-          cur_line_start = cur_pos = next_pos
-          cur_char = BOL
-          input_state = 1
+            self.cur_line += 1
+            self.cur_line_start = self.cur_pos = self.next_pos
+            self.cur_char = BOL
+            self.input_state = 1
        elif input_state == 4:
-          cur_char = EOF
-          input_state = 5
-        else: # input_state = 5
-          cur_char = u''
-        # End inlined self.next_char()
-      else: # not new_state
-        if trace: #TRACE#
-          print("blocked")  #TRACE#
-        # Begin inlined: action = self.back_up()
-        if b_action is not None:
-          (action, cur_pos, cur_line, cur_line_start,
-           cur_char, input_state, next_pos) = \
-                   (b_action, b_cur_pos, b_cur_line, b_cur_line_start,
-                    b_cur_char, b_input_state, b_next_pos)
-        else:
-          action = None
-        break # while 1
-        # End inlined: action = self.back_up()
-    self.cur_pos = cur_pos
-    self.cur_line = cur_line
-    self.cur_line_start = cur_line_start
-    self.cur_char = cur_char
-    self.input_state = input_state
-    self.next_pos     = next_pos
-    if trace: #TRACE#
-      if action is not None: #TRACE#
-        print("Doing %s" % action) #TRACE#
-    return action
-
-  def next_char(self):
-    input_state = self.input_state
-    if self.trace:
-      print("Scanner: next: %s [%d] %d" % (" "*20, input_state, self.cur_pos))
-    if input_state == 1:
-      self.cur_pos = self.next_pos
-      c = self.read_char()
-      if c == u'\n':
-        self.cur_char = EOL
-        self.input_state = 2
-      elif not c:
-        self.cur_char = EOL
-        self.input_state = 4
-      else:
-        self.cur_char = c
-    elif input_state == 2:
-      self.cur_char = u'\n'
-      self.input_state = 3
-    elif input_state == 3:
-      self.cur_line = self.cur_line + 1
-      self.cur_line_start = self.cur_pos = self.next_pos
-      self.cur_char = BOL
-      self.input_state = 1
-    elif input_state == 4:
-      self.cur_char = EOF
-      self.input_state = 5
-    else: # input_state = 5
-      self.cur_char = u''
-    if self.trace:
-      print("--> [%d] %d %s" % (input_state, self.cur_pos, repr(self.cur_char)))
-
-  def position(self):
-    """
-    Return a tuple (name, line, col) representing the location of
-    the last token read using the read() method. |name| is the
-    name that was provided to the Scanner constructor; |line|
-    is the line number in the stream (1-based); |col| is the
-    position within the line of the first character of the token
-    (0-based).
-    """
-    return (self.name, self.start_line, self.start_col)
-
-  def get_position(self):
-    """Python accessible wrapper around position(), only for error reporting.
-    """
-    return self.position()
-
-  def begin(self, state_name):
-    """Set the current state of the scanner to the named state."""
-    self.initial_state = (
-      self.lexicon.get_initial_state(state_name))
-    self.state_name = state_name
-
-  def produce(self, value, text = None):
-    """
-    Called from an action procedure, causes |value| to be returned
-    as the token value from read(). If |text| is supplied, it is
-    returned in place of the scanned text.
-
-    produce() can be called more than once during a single call to an action
-    procedure, in which case the tokens are queued up and returned one
-    at a time by subsequent calls to read(), until the queue is empty,
-    whereupon scanning resumes.
-    """
-    if text is None:
-      text = self.text
-    self.queue.append((value, text))
-
-  def eof(self):
-    """
-    Override this method if you want something to be done at
-    end of file.
-    """
+            self.cur_char = EOF
+            self.input_state = 5
+        else:  # input_state = 5
+            self.cur_char = u''
+        if self.trace:
+            print("--> [%d] %d %s" % (input_state, self.cur_pos, repr(self.cur_char)))
+
+    def position(self):
+        """
+        Return a tuple (name, line, col) representing the location of
+        the last token read using the read() method. |name| is the
+        name that was provided to the Scanner constructor; |line|
+        is the line number in the stream (1-based); |col| is the
+        position within the line of the first character of the token
+        (0-based).
+        """
+        return (self.name, self.start_line, self.start_col)
+
+    def get_position(self):
+        """Python accessible wrapper around position(), only for error reporting.
+        """
+        return self.position()
+
+    def begin(self, state_name):
+        """Set the current state of the scanner to the named state."""
+        self.initial_state = (
+            self.lexicon.get_initial_state(state_name))
+        self.state_name = state_name
+
+    def produce(self, value, text=None):
+        """
+        Called from an action procedure, causes |value| to be returned
+        as the token value from read(). If |text| is supplied, it is
+        returned in place of the scanned text.
+
+        produce() can be called more than once during a single call to an action
+        procedure, in which case the tokens are queued up and returned one
+        at a time by subsequent calls to read(), until the queue is empty,
+        whereupon scanning resumes.
+        """
+        if text is None:
+            text = self.text
+        self.queue.append((value, text))
+
+    def eof(self):
+        """
+        Override this method if you want something to be done at
+        end of file.
+        """
--- a/Cython/Plex/Traditional.py
+++ b/Cython/Plex/Traditional.py
@@ -13,147 +13,146 @@ from .Errors import PlexError


 class RegexpSyntaxError(PlexError):
-  pass
+    pass


 def re(s):
-  """
-  Convert traditional string representation of regular expression |s|
-  into Plex representation.
-  """
-  return REParser(s).parse_re()
+    """
+    Convert traditional string representation of regular expression |s|
+    into Plex representation.
+    """
+    return REParser(s).parse_re()


 class REParser(object):
-
-  def __init__(self, s):
-    self.s = s
-    self.i = -1
-    self.end = 0
-    self.next()
-
-  def parse_re(self):
-    re = self.parse_alt()
-    if not self.end:
-      self.error("Unexpected %s" % repr(self.c))
-    return re
-
-  def parse_alt(self):
-    """Parse a set of alternative regexps."""
-    re = self.parse_seq()
-    if self.c == '|':
-      re_list = [re]
-      while self.c == '|':
+    def __init__(self, s):
+        self.s = s
+        self.i = -1
+        self.end = 0
        self.next()
-        re_list.append(self.parse_seq())
-      re = Alt(*re_list)
-    return re
-
-  def parse_seq(self):
-    """Parse a sequence of regexps."""
-    re_list = []
-    while not self.end and not self.c in "|)":
-      re_list.append(self.parse_mod())
-    return Seq(*re_list)
-
-  def parse_mod(self):
-    """Parse a primitive regexp followed by *, +, ? modifiers."""
-    re = self.parse_prim()
-    while not self.end and self.c in "*+?":
-      if self.c == '*':
-        re = Rep(re)
-      elif self.c == '+':
-        re = Rep1(re)
-      else: # self.c == '?'
-        re = Opt(re)
-      self.next()
-    return re
-
-  def parse_prim(self):
-    """Parse a primitive regexp."""
-    c = self.get()
-    if c == '.':
-      re = AnyBut("\n")
-    elif c == '^':
-      re = Bol
-    elif c == '$':
-      re = Eol
-    elif c == '(':
-      re = self.parse_alt()
-      self.expect(')')
-    elif c == '[':
-      re = self.parse_charset()
-      self.expect(']')
-    else:
-      if c == '\\':
+
+    def parse_re(self):
+        re = self.parse_alt()
+        if not self.end:
+            self.error("Unexpected %s" % repr(self.c))
+        return re
+
+    def parse_alt(self):
+        """Parse a set of alternative regexps."""
+        re = self.parse_seq()
+        if self.c == '|':
+            re_list = [re]
+            while self.c == '|':
+                self.next()
+                re_list.append(self.parse_seq())
+            re = Alt(*re_list)
+        return re
+
+    def parse_seq(self):
+        """Parse a sequence of regexps."""
+        re_list = []
+        while not self.end and not self.c in "|)":
+            re_list.append(self.parse_mod())
+        return Seq(*re_list)
+
+    def parse_mod(self):
+        """Parse a primitive regexp followed by *, +, ? modifiers."""
+        re = self.parse_prim()
+        while not self.end and self.c in "*+?":
+            if self.c == '*':
+                re = Rep(re)
+            elif self.c == '+':
+                re = Rep1(re)
+            else:  # self.c == '?'
+                re = Opt(re)
+            self.next()
+        return re
+
+    def parse_prim(self):
+        """Parse a primitive regexp."""
        c = self.get()
-      re = Char(c)
-    return re
-
-  def parse_charset(self):
-    """Parse a charset. Does not include the surrounding []."""
-    char_list = []
-    invert = 0
-    if self.c == '^':
-      invert = 1
-      self.next()
-    if self.c == ']':
-      char_list.append(']')
-      self.next()
-    while not self.end and self.c != ']':
-      c1 = self.get()
-      if self.c == '-' and self.lookahead(1) != ']':
+        if c == '.':
+            re = AnyBut("\n")
+        elif c == '^':
+            re = Bol
+        elif c == '$':
+            re = Eol
+        elif c == '(':
+            re = self.parse_alt()
+            self.expect(')')
+        elif c == '[':
+            re = self.parse_charset()
+            self.expect(']')
+        else:
+            if c == '\\':
+                c = self.get()
+            re = Char(c)
+        return re
+
+    def parse_charset(self):
+        """Parse a charset. Does not include the surrounding []."""
+        char_list = []
+        invert = 0
+        if self.c == '^':
+            invert = 1
+            self.next()
+        if self.c == ']':
+            char_list.append(']')
+            self.next()
+        while not self.end and self.c != ']':
+            c1 = self.get()
+            if self.c == '-' and self.lookahead(1) != ']':
+                self.next()
+                c2 = self.get()
+                for a in xrange(ord(c1), ord(c2) + 1):
+                    char_list.append(chr(a))
+            else:
+                char_list.append(c1)
+        chars = ''.join(char_list)
+        if invert:
+            return AnyBut(chars)
+        else:
+            return Any(chars)
+
+    def next(self):
+        """Advance to the next char."""
+        s = self.s
+        i = self.i = self.i + 1
+        if i < len(s):
+            self.c = s[i]
+        else:
+            self.c = ''
+            self.end = 1
+
+    def get(self):
+        if self.end:
+            self.error("Premature end of string")
+        c = self.c
        self.next()
-        c2 = self.get()
-        for a in xrange(ord(c1), ord(c2) + 1):
-          char_list.append(chr(a))
-      else:
-        char_list.append(c1)
-    chars = ''.join(char_list)
-    if invert:
-      return AnyBut(chars)
-    else:
-      return Any(chars)
-
-  def next(self):
-    """Advance to the next char."""
-    s = self.s
-    i = self.i = self.i + 1
-    if i < len(s):
-      self.c = s[i]
-    else:
-      self.c = ''
-      self.end = 1
-
-  def get(self):
-    if self.end:
-      self.error("Premature end of string")
-    c = self.c
-    self.next()
-    return c
-
-  def lookahead(self, n):
-    """Look ahead n chars."""
-    j = self.i + n
-    if j < len(self.s):
-      return self.s[j]
-    else:
-      return ''
-
-  def expect(self, c):
-    """
-    Expect to find character |c| at current position.
-    Raises an exception otherwise.
-    """
-    if self.c == c:
-      self.next()
-    else:
-      self.error("Missing %s" % repr(c))
-
-  def error(self, mess):
-    """Raise exception to signal syntax error in regexp."""
-    raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
-      repr(self.s), self.i, mess))
+        return c
+
+    def lookahead(self, n):
+        """Look ahead n chars."""
+        j = self.i + n
+        if j < len(self.s):
+            return self.s[j]
+        else:
+            return ''
+
+    def expect(self, c):
+        """
+        Expect to find character |c| at current position.
+        Raises an exception otherwise.
+        """
+        if self.c == c:
+            self.next()
+        else:
+            self.error("Missing %s" % repr(c))
+
+    def error(self, mess):
+        """Raise exception to signal syntax error in regexp."""
+        raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
+            repr(self.s), self.i, mess))



--- a/Cython/Plex/Transitions.py
+++ b/Cython/Plex/Transitions.py
 #
-#   Plex - Transition Maps
+# Plex - Transition Maps
 #
-#   This version represents state sets directly as dicts for speed.
+# This version represents state sets directly as dicts for speed.
 #

 from __future__ import absolute_import
@@ -10,229 +10,231 @@ from sys import maxint as maxint


 class TransitionMap(object):
-  """
-  A TransitionMap maps an input event to a set of states.
-  An input event is one of: a range of character codes,
-  the empty string (representing an epsilon move), or one
-  of the special symbols BOL, EOL, EOF.
-
-  For characters, this implementation compactly represents
-  the map by means of a list:
-
-    [code_0, states_0, code_1, states_1, code_2, states_2,
-      ..., code_n-1, states_n-1, code_n]
-
-  where |code_i| is a character code, and |states_i| is a
-  set of states corresponding to characters with codes |c|
-  in the range |code_i| <= |c| <= |code_i+1|.
-
-  The following invariants hold:
-    n >= 1
-    code_0 == -maxint
-    code_n == maxint
-    code_i < code_i+1 for i in 0..n-1
-    states_0 == states_n-1
-
-  Mappings for the special events '', BOL, EOL, EOF are
-  kept separately in a dictionary.
-  """
-
-  map = None     # The list of codes and states
-  special = None # Mapping for special events
-
-  def __init__(self, map = None, special = None):
-    if not map:
-      map = [-maxint, {}, maxint]
-    if not special:
-      special = {}
-    self.map = map
-    self.special = special
-    #self.check() ###
-
-  def add(self, event, new_state,
-    TupleType = tuple):
    """
-    Add transition to |new_state| on |event|.
+    A TransitionMap maps an input event to a set of states.
+    An input event is one of: a range of character codes,
+    the empty string (representing an epsilon move), or one
+    of the special symbols BOL, EOL, EOF.
+
+    For characters, this implementation compactly represents
+    the map by means of a list:
+
+      [code_0, states_0, code_1, states_1, code_2, states_2,
+        ..., code_n-1, states_n-1, code_n]
+
+    where |code_i| is a character code, and |states_i| is a
+    set of states corresponding to characters with codes |c|
+    in the range |code_i| <= |c| <= |code_i+1|.
+
+    The following invariants hold:
+      n >= 1
+      code_0 == -maxint
+      code_n == maxint
+      code_i < code_i+1 for i in 0..n-1
+      states_0 == states_n-1
+
+    Mappings for the special events '', BOL, EOL, EOF are
+    kept separately in a dictionary.
    """
-    if type(event) is TupleType:
-      code0, code1 = event
-      i = self.split(code0)
-      j = self.split(code1)
-      map = self.map
-      while i < j:
-        map[i + 1][new_state] = 1
-        i = i + 2
-    else:
-      self.get_special(event)[new_state] = 1
-
-  def add_set(self, event, new_set,
-    TupleType = tuple):
-    """
-    Add transitions to the states in |new_set| on |event|.
-    """
-    if type(event) is TupleType:
-      code0, code1 = event
-      i = self.split(code0)
-      j = self.split(code1)
-      map = self.map
-      while i < j:
-        map[i + 1].update(new_set)
-        i = i + 2
-    else:
-      self.get_special(event).update(new_set)
-
-  def get_epsilon(self,
-    none = None):
-    """
-    Return the mapping for epsilon, or None.
-    """
-    return self.special.get('', none)

-  def iteritems(self,
-    len = len):
-    """
-    Return the mapping as an iterable of ((code1, code2), state_set) and
-    (special_event, state_set) pairs.
-    """
-    result = []
-    map = self.map
-    else_set = map[1]
-    i = 0
-    n = len(map) - 1
-    code0 = map[0]
-    while i < n:
-      set = map[i + 1]
-      code1 = map[i + 2]
-      if set or else_set:
-        result.append(((code0, code1), set))
-      code0 = code1
-      i = i + 2
-    for event, set in self.special.iteritems():
-      if set:
-        result.append((event, set))
-    return iter(result)
-  items = iteritems
-
-  # ------------------- Private methods --------------------
-
-  def split(self, code,
-    len = len, maxint = maxint):
-    """
-    Search the list for the position of the split point for |code|,
-    inserting a new split point if necessary. Returns index |i| such
-    that |code| == |map[i]|.
-    """
-    # We use a funky variation on binary search.
-    map = self.map
-    hi = len(map) - 1
-    # Special case: code == map[-1]
-    if code == maxint:
-      return hi
-    # General case
-    lo = 0
-    # loop invariant: map[lo] <= code < map[hi] and hi - lo >= 2
-    while hi - lo >= 4:
-      # Find midpoint truncated to even index
-      mid = ((lo + hi) // 2) & ~1
-      if code < map[mid]:
-        hi = mid
-      else:
-        lo = mid
-    # map[lo] <= code < map[hi] and hi - lo == 2
-    if map[lo] == code:
-      return lo
-    else:
-      map[hi:hi] = [code, map[hi - 1].copy()]
-      #self.check() ###
-      return hi
-
-  def get_special(self, event):
-    """
-    Get state set for special event, adding a new entry if necessary.
-    """
-    special = self.special
-    set = special.get(event, None)
-    if not set:
-      set = {}
-      special[event] = set
-    return set
-
-  # --------------------- Conversion methods -----------------------
-
-  def __str__(self):
-    map_strs = []
-    map = self.map
-    n = len(map)
-    i = 0
-    while i < n:
-      code = map[i]
-      if code == -maxint:
-        code_str = "-inf"
-      elif code == maxint:
-        code_str = "inf"
-      else:
-        code_str = str(code)
-      map_strs.append(code_str)
-      i = i + 1
-      if i < n:
-        map_strs.append(state_set_str(map[i]))
-      i = i + 1
-    special_strs = {}
-    for event, set in self.special.iteritems():
-      special_strs[event] = state_set_str(set)
-    return "[%s]+%s" % (
-      ','.join(map_strs),
-      special_strs
-    )
-
-  # --------------------- Debugging methods -----------------------
-
-  def check(self):
-    """Check data structure integrity."""
-    if not self.map[-3] < self.map[-1]:
-      print(self)
-      assert 0
-
-  def dump(self, file):
-    map = self.map
-    i = 0
-    n = len(map) - 1
-    while i < n:
-      self.dump_range(map[i], map[i + 2], map[i + 1], file)
-      i = i + 2
-    for event, set in self.special.iteritems():
-      if set:
-        if not event:
-          event = 'empty'
-        self.dump_trans(event, set, file)
-
-  def dump_range(self, code0, code1, set, file):
-    if set:
-      if code0 == -maxint:
-        if code1 == maxint:
-          k = "any"
+    map = None      # The list of codes and states
+    special = None  # Mapping for special events
+
+    def __init__(self, map=None, special=None):
+        if not map:
+            map = [-maxint, {}, maxint]
+        if not special:
+            special = {}
+        self.map = map
+        self.special = special
+        #self.check() ###
+
+    def add(self, event, new_state,
+            TupleType=tuple):
+        """
+        Add transition to |new_state| on |event|.
+        """
+        if type(event) is TupleType:
+            code0, code1 = event
+            i = self.split(code0)
+            j = self.split(code1)
+            map = self.map
+            while i < j:
+                map[i + 1][new_state] = 1
+                i += 2
+        else:
+            self.get_special(event)[new_state] = 1
+
+    def add_set(self, event, new_set,
+                TupleType=tuple):
+        """
+        Add transitions to the states in |new_set| on |event|.
+        """
+        if type(event) is TupleType:
+            code0, code1 = event
+            i = self.split(code0)
+            j = self.split(code1)
+            map = self.map
+            while i < j:
+                map[i + 1].update(new_set)
+                i += 2
+        else:
+            self.get_special(event).update(new_set)
+
+    def get_epsilon(self,
+                    none=None):
+        """
+        Return the mapping for epsilon, or None.
+        """
+        return self.special.get('', none)
+
+    def iteritems(self,
+                  len=len):
+        """
+        Return the mapping as an iterable of ((code1, code2), state_set) and
+        (special_event, state_set) pairs.
+        """
+        result = []
+        map = self.map
+        else_set = map[1]
+        i = 0
+        n = len(map) - 1
+        code0 = map[0]
+        while i < n:
+            set = map[i + 1]
+            code1 = map[i + 2]
+            if set or else_set:
+                result.append(((code0, code1), set))
+            code0 = code1
+            i += 2
+        for event, set in self.special.iteritems():
+            if set:
+                result.append((event, set))
+        return iter(result)
+
+    items = iteritems
+
+    # ------------------- Private methods --------------------
+
+    def split(self, code,
+              len=len, maxint=maxint):
+        """
+        Search the list for the position of the split point for |code|,
+        inserting a new split point if necessary. Returns index |i| such
+        that |code| == |map[i]|.
+        """
+        # We use a funky variation on binary search.
+        map = self.map
+        hi = len(map) - 1
+        # Special case: code == map[-1]
+        if code == maxint:
+            return hi
+        # General case
+        lo = 0
+        # loop invariant: map[lo] <= code < map[hi] and hi - lo >= 2
+        while hi - lo >= 4:
+            # Find midpoint truncated to even index
+            mid = ((lo + hi) // 2) & ~1
+            if code < map[mid]:
+                hi = mid
+            else:
+                lo = mid
+        # map[lo] <= code < map[hi] and hi - lo == 2
+        if map[lo] == code:
+            return lo
        else:
-          k = "< %s" % self.dump_char(code1)
-      elif code1 == maxint:
-        k = "> %s" % self.dump_char(code0 - 1)
-      elif code0 == code1 - 1:
-        k = self.dump_char(code0)
-      else:
-        k = "%s..%s" % (self.dump_char(code0),
-          self.dump_char(code1 - 1))
-      self.dump_trans(k, set, file)
-
-  def dump_char(self, code):
-    if 0 <= code <= 255:
-      return repr(chr(code))
-    else:
-      return "chr(%d)" % code
-
-  def dump_trans(self, key, set, file):
-    file.write("      %s --> %s\n" % (key, self.dump_set(set)))
-
-  def dump_set(self, set):
-    return state_set_str(set)
+            map[hi:hi] = [code, map[hi - 1].copy()]
+            #self.check() ###
+            return hi
+
+    def get_special(self, event):
+        """
+        Get state set for special event, adding a new entry if necessary.
+        """
+        special = self.special
+        set = special.get(event, None)
+        if not set:
+            set = {}
+            special[event] = set
+        return set
+
+    # --------------------- Conversion methods -----------------------
+
+    def __str__(self):
+        map_strs = []
+        map = self.map
+        n = len(map)
+        i = 0
+        while i < n:
+            code = map[i]
+            if code == -maxint:
+                code_str = "-inf"
+            elif code == maxint:
+                code_str = "inf"
+            else:
+                code_str = str(code)
+            map_strs.append(code_str)
+            i += 1
+            if i < n:
+                map_strs.append(state_set_str(map[i]))
+            i += 1
+        special_strs = {}
+        for event, set in self.special.iteritems():
+            special_strs[event] = state_set_str(set)
+        return "[%s]+%s" % (
+            ','.join(map_strs),
+            special_strs
+        )
+
+    # --------------------- Debugging methods -----------------------
+
+    def check(self):
+        """Check data structure integrity."""
+        if not self.map[-3] < self.map[-1]:
+            print(self)
+            assert 0
+
+    def dump(self, file):
+        map = self.map
+        i = 0
+        n = len(map) - 1
+        while i < n:
+            self.dump_range(map[i], map[i + 2], map[i + 1], file)
+            i += 2
+        for event, set in self.special.iteritems():
+            if set:
+                if not event:
+                    event = 'empty'
+                self.dump_trans(event, set, file)
+
+    def dump_range(self, code0, code1, set, file):
+        if set:
+            if code0 == -maxint:
+                if code1 == maxint:
+                    k = "any"
+                else:
+                    k = "< %s" % self.dump_char(code1)
+            elif code1 == maxint:
+                k = "> %s" % self.dump_char(code0 - 1)
+            elif code0 == code1 - 1:
+                k = self.dump_char(code0)
+            else:
+                k = "%s..%s" % (self.dump_char(code0),
+                                self.dump_char(code1 - 1))
+            self.dump_trans(k, set, file)
+
+    def dump_char(self, code):
+        if 0 <= code <= 255:
+            return repr(chr(code))
+        else:
+            return "chr(%d)" % code
+
+    def dump_trans(self, key, set, file):
+        file.write("      %s --> %s\n" % (key, self.dump_set(set)))
+
+    def dump_set(self, set):
+        return state_set_str(set)
+

 #
 #   State set manipulation functions
@@ -243,4 +245,4 @@ class TransitionMap(object):
 #            set1[state] = 1

 def state_set_str(set):
-  return "[%s]" % ','.join(["S%d" % state.number for state in set])
+    return "[%s]" % ','.join(["S%d" % state.number for state in set])
--- a/Cython/Shadow.py
+++ b/Cython/Shadow.py
 # cython.* namespace for pure mode.
-__version__ = "0.21"
+__version__ = "0.21.1pre"


 # BEGIN shameless copy from Cython/minivect/minitypes.py

--- a/Cython/Utility/CFuncConvert.pyx
+++ b/Cython/Utility/CFuncConvert.pyx
+#################### cfunc.to_py ####################
+
+@cname("{{cname}}")
+cdef object {{cname}}({{return_type.ctype}} (*f)({{ ', '.join(arg.type_cname for arg in args) }}) {{except_clause}}):
+    def wrap({{ ', '.join('{arg.ctype} {arg.name}'.format(arg=arg) for arg in args) }}):
+        """wrap({{', '.join(('{arg.name}: {arg.type_displayname}'.format(arg=arg) if arg.type_displayname else arg.name) for arg in args)}}){{if return_type.type_displayname}} -> {{return_type.type_displayname}}{{endif}}"""
+        {{'' if return_type.type.is_void else 'return '}}f({{ ', '.join(arg.name for arg in args) }})
+    return wrap
--- a/Cython/Utility/CythonFunction.c
+++ b/Cython/Utility/CythonFunction.c
@@ -545,13 +545,12 @@ static PyObject *__Pyx_CyFunction_descr_get(PyObject *func, PyObject *obj, PyObj
    if (m->flags & __Pyx_CYFUNCTION_CLASSMETHOD) {
        if (type == NULL)
            type = (PyObject *)(Py_TYPE(obj));
-        return PyMethod_New(func,
-                            type, (PyObject *)(Py_TYPE(type)));
+        return __Pyx_PyMethod_New(func, type, (PyObject *)(Py_TYPE(type)));
    }

    if (obj == Py_None)
        obj = NULL;
-    return PyMethod_New(func, obj, type);
+    return __Pyx_PyMethod_New(func, obj, type);
 }

 static PyObject*

--- a/Cython/Utility/Exceptions.c
+++ b/Cython/Utility/Exceptions.c
@@ -213,6 +213,13 @@ static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject
    PyErr_SetObject(type, value);

    if (tb) {
+#if CYTHON_COMPILING_IN_PYPY
+        PyObject *tmp_type, *tmp_value, *tmp_tb;
+        PyErr_Fetch(tmp_type, tmp_value, tmp_tb);
+        Py_INCREF(tb);
+        PyErr_Restore(tmp_type, tmp_value, tb);
+        Py_XDECREF(tmp_tb);
+#else
        PyThreadState *tstate = PyThreadState_GET();
        PyObject* tmp_tb = tstate->curexc_traceback;
        if (tb != tmp_tb) {
@@ -220,6 +227,7 @@ static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject
            tstate->curexc_traceback = tb;
            Py_XDECREF(tmp_tb);
        }
+#endif
    }

 bad:

--- a/Cython/Utility/ModuleSetupCode.c
+++ b/Cython/Utility/ModuleSetupCode.c
@@ -62,9 +62,6 @@
 #if PY_MAJOR_VERSION >= 3
  #define Py_TPFLAGS_CHECKTYPES 0
  #define Py_TPFLAGS_HAVE_INDEX 0
-#endif
-
-#if PY_MAJOR_VERSION >= 3
  #define Py_TPFLAGS_HAVE_NEWBUFFER 0
 #endif

@@ -158,6 +155,12 @@
  #define PyBoolObject                 PyLongObject
 #endif

+#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY
+  #ifndef PyUnicode_InternFromString
+    #define PyUnicode_InternFromString(s) PyUnicode_FromString(s)
+  #endif
+#endif
+
 #if PY_VERSION_HEX < 0x030200A4
  typedef long Py_hash_t;
  #define __Pyx_PyInt_FromHash_t PyInt_FromLong
@@ -168,7 +171,9 @@
 #endif

 #if PY_MAJOR_VERSION >= 3
-  #define PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func))
+  #define __Pyx_PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func))
+#else
+  #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass)
 #endif

 /* inline attribute */

--- a/Demos/libraries/setup.py
+++ b/Demos/libraries/setup.py
 import os
+import sys

 from distutils.core import setup
 from distutils.extension import Extension
-from Cython.Distutils import build_ext
+from Cython.Build import cythonize


 # For demo purposes, we build our own tiny library.
@@ -12,20 +13,19 @@ try:
    assert os.system("ar rcs libmymath.a mymath.o") == 0
 except:
    if not os.path.exists("libmymath.a"):
-        print "Error building external library, please create libmymath.a manually."
+        print("Error building external library, please create libmymath.a manually.")
        sys.exit(1)

 # Here is how to use the library built above. 
-ext_modules=[ 
+ext_modules = cythonize([
    Extension("call_mymath",
-              sources = ["call_mymath.pyx"],
-              include_dirs = [os.getcwd()],  # path to .h file(s)
-              library_dirs = [os.getcwd()],  # path to .a or .so file(s)
-              libraries = ['mymath'])
-]
+              sources=["call_mymath.pyx"],
+              include_dirs=[os.getcwd()],  # path to .h file(s)
+              library_dirs=[os.getcwd()],  # path to .a or .so file(s)
+              libraries=['mymath'])
+])

 setup(
-  name = 'Demos',
-  cmdclass = {'build_ext': build_ext},
-  ext_modules = ext_modules,
+    name='Demos',
+    ext_modules=ext_modules,
 )
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -5,8 +5,7 @@ include pylintrc
 include setup.py
 include setupegg.py
 include bin/*
-include cython.py
-include cygdb.py
+include cython.py cythonize.py cygdb.py
 recursive-include Cython *.pyx *.pxd
 include Doc/*


--- a/docs/examples/tutorial/primes/primes.py
+++ b/docs/examples/tutorial/primes/primes.py
+
 def primes(kmax):
    result = []
    if kmax > 1000:
        kmax = 1000
+
+    p = [0] * 1000
+    k = 0
+    n = 2
    while k < kmax:
        i = 0
        while i < k and n % p[i] != 0:
-            i = i + 1
+            i += 1
        if i == k:
            p[k] = n
-            k = k + 1
+            k += 1
            result.append(n)
-        n = n + 1
+        n += 1
    return result
-
--- a/docs/src/quickstart/build.rst
+++ b/docs/src/quickstart/build.rst
@@ -78,6 +78,8 @@ You can show Cython's code analysis by passing the ``--annotate`` option::
    %%cython --annotate
    ...

+.. figure:: ipython.png
+

 Using the Sage notebook
 -----------------------

--- a/docs/src/quickstart/ipython.png
+++ b/docs/src/quickstart/ipython.png
--- a/docs/src/userguide/wrapping_CPlusPlus.rst
+++ b/docs/src/userguide/wrapping_CPlusPlus.rst
@@ -137,10 +137,14 @@ together into :file:`rect.so`, which you can then import in Python using
 ``import rect`` (if you forget to link the :file:`Rectangle.o`, you will
 get missing symbols while importing the library in Python).

+Note that the ``language`` option has no effect on user provided Extension
+objects that are passed into ``cythonize()``.  It is only used for modules
+found by file name (as in the example above).
+
 The options can also be passed directly from the source file, which is
-often preferable.  Starting with version 0.17, Cython also allows to
-pass external source files into the ``cythonize()`` command this way.
-Here is a simplified setup.py file::
+often preferable (and overrides any global option).  Starting with
+version 0.17, Cython also allows to pass external source files into the
+``cythonize()`` command this way.  Here is a simplified setup.py file::

   from distutils.core import setup
   from Cython.Build import cythonize

--- a/runtests.py
+++ b/runtests.py
@@ -1951,6 +1951,8 @@ def runtests(options, cmd_args, coverage=None):

    try:
        import jedi
+        if list(map(int, re.findall('[0-9]+', jedi.__version__))) < [0, 8, 1]:
+            raise ImportError
    except ImportError:
        exclude_selectors.append(RegExSelector('Jedi'))


--- a/tests/buffers/bufaccess.pyx
+++ b/tests/buffers/bufaccess.pyx
@@ -190,13 +190,13 @@ def acquire_nonbuffer1(first, second=None):
    """
    >>> acquire_nonbuffer1(3)   # doctest: +ELLIPSIS
    Traceback (most recent call last):
-    TypeError: 'int' does not ... the buffer interface
+    TypeError:... 'int'...
    >>> acquire_nonbuffer1(type)   # doctest: +ELLIPSIS
    Traceback (most recent call last):
-    TypeError: 'type' does not ... the buffer interface
+    TypeError:... 'type'...
    >>> acquire_nonbuffer1(None, 2)   # doctest: +ELLIPSIS
    Traceback (most recent call last):
-    TypeError: 'int' does not ... the buffer interface
+    TypeError:... 'int'...
    """
    cdef object[int] buf
    buf = first

--- a/tests/build/cpp_cythonize.srctree
+++ b/tests/build/cpp_cythonize.srctree
@@ -10,13 +10,11 @@ from Cython.Build.Dependencies import cythonize
 from distutils.core import setup

 setup(
-  ext_modules = cythonize("*.pyx"),
+    ext_modules = cythonize("*.pyx", language='c++'),
 )

 ######## a.pyx ########

-# distutils: language = c++
-
 from libcpp.vector cimport vector

 def use_vector(L):

--- a/tests/errors/pxd_signature_mismatch.pxd
+++ b/tests/errors/pxd_signature_mismatch.pxd
+
+cdef int wrong_args(int x, long y)
+
+cdef long wrong_return_type(int x, int y)
+
+cdef int wrong_exception_check(int x, int y) except 0
+
+cdef int wrong_exception_value(int x, int y) except 0
+
+cdef int wrong_exception_value_check(int x, int y) except 0
+
+cdef int inherit_exception_value(int x, int y) except 0
+
+cdef int inherit_exception_check(int x, int y) except *
--- a/tests/errors/pxd_signature_mismatch.pyx
+++ b/tests/errors/pxd_signature_mismatch.pyx
+# mode: error
+# tag: pxd
+
+cdef int wrong_args(int x, int y):
+    return 2
+
+cdef int wrong_return_type(int x, int y):
+    return 2
+
+cdef int wrong_exception_check(int x, int y) except? 0:
+    return 2
+
+cdef int wrong_exception_value(int x, int y) except 1:
+    return 2
+
+cdef int wrong_exception_value_check(int x, int y) except? 1:
+    return 2
+
+cdef int inherit_exception_value(int x, int y):
+    return 2
+
+cdef int inherit_exception_check(int x, int y):
+    return 2
+
+
+_ERRORS = """
+4:5: Function signature does not match previous declaration
+7:5: Function signature does not match previous declaration
+10:5: Function signature does not match previous declaration
+13:5: Function signature does not match previous declaration
+16:5: Function signature does not match previous declaration
+19:5: Function signature does not match previous declaration
+22:5: Function signature does not match previous declaration
+"""
--- a/tests/errors/w_unused.pyx
+++ b/tests/errors/w_unused.pyx
@@ -18,7 +18,7 @@ def unused_result():
    return r

 def unused_nested():
-    def unused_one():
+    def _unused_one():
        pass

 def unused_class():
@@ -53,7 +53,7 @@ _ERRORS = """
 9:9: Unused entry 'b'
 12:15: Unused argument 'arg'
 16:6: Unused result in 'r'
-21:4: Unused entry 'unused_one'
+21:4: Unused entry '_unused_one'
 25:4: Unused entry 'Unused'
 35:16: Unused entry 'foo'
 36:13: Unused entry 'i'

--- a/tests/memoryview/memslice.pyx
+++ b/tests/memoryview/memslice.pyx
@@ -14,7 +14,6 @@ from cython.parallel cimport prange, parallel

 import gc
 import sys
-import re

 if sys.version_info[0] < 3:
    import __builtin__ as builtins
@@ -26,9 +25,6 @@ __test__ = {}

 def testcase(func):
    doctest = func.__doc__
-    if sys.version_info >= (3,1,1):
-        doctest = doctest.replace('does not have the buffer interface',
-                                  'does not support the buffer interface')
    if sys.version_info >= (3, 0):
        _u = str
    else:
@@ -162,22 +158,22 @@ def acquire_failure3():
 @testcase
 def acquire_nonbuffer1(first, second=None):
    """
-    >>> acquire_nonbuffer1(3)
+    >>> acquire_nonbuffer1(3)  # doctest: +ELLIPSIS
    Traceback (most recent call last):
      ...
-    TypeError: 'int' does not have the buffer interface
-    >>> acquire_nonbuffer1(type)
+    TypeError:... 'int'...
+    >>> acquire_nonbuffer1(type)  # doctest: +ELLIPSIS
    Traceback (most recent call last):
      ...
-    TypeError: 'type' does not have the buffer interface
-    >>> acquire_nonbuffer1(None, 2)
+    TypeError:... 'type'...
+    >>> acquire_nonbuffer1(None, 2)  # doctest: +ELLIPSIS
    Traceback (most recent call last):
      ...
-    TypeError: 'int' does not have the buffer interface
-    >>> acquire_nonbuffer1(4, object())
+    TypeError:... 'int'...
+    >>> acquire_nonbuffer1(4, object())  # doctest: +ELLIPSIS
    Traceback (most recent call last):
      ...
-    TypeError: 'int' does not have the buffer interface
+    TypeError:... 'int'...
    """
    cdef int[:] buf
    buf = first

--- a/tests/run/cfunc_convert.pyx
+++ b/tests/run/cfunc_convert.pyx
+# mode: run
+# cython: always_allow_keywords=True
+
+cimport cython
+
+from libc.math cimport sqrt
+
+cdef void empty_cfunc():
+    print "here"
+
+# same signature
+cdef void another_empty_cfunc():
+    print "there"
+
+def call_empty_cfunc():
+    """
+    >>> call_empty_cfunc()
+    here
+    there
+    """
+    cdef object py_func = empty_cfunc
+    py_func()
+    cdef object another_py_func = another_empty_cfunc
+    another_py_func()
+
+
+cdef double square_c(double x):
+    return x * x
+
+def call_square_c(x):
+    """
+    >>> call_square_c(2)
+    4.0
+    >>> call_square_c(-7)
+    49.0
+    """
+    cdef object py_func = square_c
+    return py_func(x)
+
+
+def return_square_c():
+    """
+    >>> square_c = return_square_c()
+    >>> square_c(5)
+    25.0
+    >>> square_c(x=4)
+    16.0
+    >>> square_c.__doc__   # FIXME: try to make original C function name available
+    'wrap(x: float) -> float'
+    """
+    return square_c
+
+
+def return_libc_sqrt():
+    """
+    >>> sqrt = return_libc_sqrt()
+    >>> sqrt(9)
+    3.0
+    >>> sqrt(x=9)
+    3.0
+    >>> sqrt.__doc__
+    'wrap(x: float) -> float'
+    """
+    return sqrt
+
+
+global_csqrt = sqrt
+
+def test_global():
+    """
+    >>> global_csqrt(9)
+    3.0
+    >>> global_csqrt.__doc__
+    'wrap(x: float) -> float'
+    >>> test_global()
+    double (double) nogil
+    Python object
+    """
+    print cython.typeof(sqrt)
+    print cython.typeof(global_csqrt)
+
+
+cdef long long rad(long long x):
+    cdef long long rad = 1
+    for p in range(2, <long long>sqrt(x) + 1):
+        if x % p == 0:
+            rad *= p
+            while x % p == 0:
+                x //= p
+        if x == 1:
+            break
+    return rad
+
+cdef bint abc(long long a, long long b, long long c) except -1:
+    if a + b != c:
+        raise ValueError("Not a valid abc candidate: (%s, %s, %s)" % (a, b, c))
+    return rad(a*b*c) < c
+
+def call_abc(a, b, c):
+    """
+    >>> call_abc(2, 3, 5)
+    False
+    >>> call_abc(1, 63, 64)
+    True
+    >>> call_abc(2, 3**10 * 109, 23**5)
+    True
+    >>> call_abc(a=2, b=3**10 * 109, c=23**5)
+    True
+    >>> call_abc(1, 1, 1)
+    Traceback (most recent call last):
+    ...
+    ValueError: Not a valid abc candidate: (1, 1, 1)
+    """
+    cdef object py_func = abc
+    return py_func(a, b, c)
+
+def return_abc():
+    """
+    >>> abc = return_abc()
+    >>> abc(2, 3, 5)
+    False
+    >>> abc.__doc__
+    "wrap(a: 'long long', b: 'long long', c: 'long long') -> bool"
+    """
+    return abc
+
+
+ctypedef double foo
+cdef foo test_typedef_cfunc(foo x):
+    return x
+
+def test_typedef(x):
+    """
+    >>> test_typedef(100)
+    100.0
+    """
+    return (<object>test_typedef_cfunc)(x)
+
+
+cdef union my_union:
+    int a
+    double b
+
+cdef struct my_struct:
+    int which
+    my_union y
+
+cdef my_struct c_struct_builder(int which, int a, double b):
+    cdef my_struct value
+    value.which = which
+    if which:
+        value.y.a = a
+    else:
+        value.y.b = b
+    return value
+
+def return_struct_builder():
+    """
+    >>> make = return_struct_builder()
+    >>> d = make(0, 1, 2)
+    >>> d['which']
+    0
+    >>> d['y']['b']
+    2.0
+    >>> d = make(1, 1, 2)
+    >>> d['which']
+    1
+    >>> d['y']['a']
+    1
+    >>> make.__doc__
+    "wrap(which: 'int', a: 'int', b: float) -> 'my_struct'"
+    """
+    return c_struct_builder
+
+
+cdef object test_object_params_cfunc(a, b):
+    return a, b
+
+def test_object_params(a, b):
+    """
+    >>> test_object_params(1, 'a')
+    (1, 'a')
+    """
+    return (<object>test_object_params_cfunc)(a, b)
+
+
+cdef tuple test_builtin_params_cfunc(list a, dict b):
+    return a, b
+
+def test_builtin_params(a, b):
+    """
+    >>> test_builtin_params([], {})
+    ([], {})
+    >>> test_builtin_params(1, 2)
+    Traceback (most recent call last):
+    ...
+    TypeError: Argument 'a' has incorrect type (expected list, got int)
+    """
+    return (<object>test_builtin_params_cfunc)(a, b)
+
+def return_builtin_params_cfunc():
+    """
+    >>> cfunc = return_builtin_params_cfunc()
+    >>> cfunc([1, 2], {'a': 3})
+    ([1, 2], {'a': 3})
+    >>> cfunc.__doc__
+    'wrap(a: list, b: dict) -> tuple'
+    """
+    return test_builtin_params_cfunc
+
+
+cdef class A:
+    def __repr__(self):
+        return self.__class__.__name__
+
+cdef class B(A):
+    pass
+
+cdef A test_cdef_class_params_cfunc(A a, B b):
+    return b
+
+def test_cdef_class_params(a, b):
+    """
+    >>> test_cdef_class_params(A(), B())
+    B
+    >>> test_cdef_class_params(B(), A())
+    Traceback (most recent call last):
+    ...
+    TypeError: Argument 'b' has incorrect type (expected cfunc_convert.B, got cfunc_convert.A)
+    """
+    return (<object>test_cdef_class_params_cfunc)(a, b)
--- a/tests/run/cpdef_extern_func.pxd
+++ b/tests/run/cpdef_extern_func.pxd
+# cython: c_string_type=str
+# cython: c_string_encoding=ascii
+
+cdef extern from "math.h":
+    cpdef double pxd_sqrt "sqrt"(double)
--- a/tests/run/cpdef_extern_func.pyx
+++ b/tests/run/cpdef_extern_func.pyx
+# cython: c_string_type=str
+# cython: c_string_encoding=ascii
+
+__doc__ = """
+>>> sqrt(1)
+1.0
+>>> pyx_sqrt(4)
+2.0
+>>> pxd_sqrt(9)
+3.0
+>>> log(10)
+Traceback (most recent call last):
+...
+NameError: name 'log' is not defined
+>>> strchr('abcabc', ord('c'))
+'cabc'
+"""
+
+cdef extern from "math.h":
+    cpdef double sqrt(double)
+    cpdef double pyx_sqrt "sqrt"(double)
+    cdef double log(double) # not wrapped
+
+cdef extern from "string.h":
+    # signature must be exact in C++, disagrees with C
+    cpdef const char* strchr(const char *haystack, int needle);
--- a/tests/run/libc_time.pyx
+++ b/tests/run/libc_time.pyx
+# tag: posix
+from libc.stdlib  cimport getenv
+from posix.stdlib cimport setenv, unsetenv
+from libc.time    cimport *
+
+
+def test_time():
+    """
+    >>> test_time()
+    """
+    cdef time_t t1, t2
+    t1 = time(NULL)
+    assert t1 != 0
+    t1 = time(&t2)
+    assert t1 == t2
+
+
+def test_mktime():
+    """
+    >>> test_mktime()  # doctest:+ELLIPSIS
+    (986138177, ...'Sun Apr  1 15:16:17 2001\\n')
+    """
+    cdef tm t, gmt
+    cdef time_t tt
+    cdef char *ct
+    cdef char *tz
+
+    tz = getenv("TZ")
+    setenv("TZ", "UTC", 1)
+    tzset()
+    t.tm_sec = 17
+    t.tm_min = 16
+    t.tm_hour = 15
+    t.tm_year = 101
+    t.tm_mon = 3
+    t.tm_mday = 1
+    t.tm_isdst = 0
+    tt = mktime(&t)
+    assert tt != -1
+    ct = ctime(&tt)
+    assert ct != NULL
+    if tz:
+        setenv("TZ", tz, 1)
+    else:
+        unsetenv("TZ")
+    tzset()
+    return tt, ct
--- a/tests/run/posix_sys_time.pyx
+++ b/tests/run/posix_sys_time.pyx
-# tag: posix
-
-from posix.sys_time cimport *
-
-def test_itimer(sec, usec):
-    """
-    >>> test_itimer(10, 2)
-    (10, 2)
-    """
-    cdef itimerval t, gtime
-
-    t.it_interval.tv_sec = sec
-    t.it_interval.tv_usec = usec
-    t.it_value.tv_sec = sec
-    t.it_value.tv_usec = usec
-    ret = setitimer(ITIMER_REAL, &t, NULL)
-    assert ret == 0
-    ret = getitimer(ITIMER_REAL, &gtime)
-    assert ret == 0
-    t.it_interval.tv_sec = 0
-    t.it_interval.tv_usec = 0
-    t.it_value.tv_sec = 0
-    t.it_value.tv_usec = 0
-    ret = setitimer(ITIMER_REAL, &t, NULL)
-    return gtime.it_interval.tv_sec, gtime.it_interval.tv_usec
-
-def test_gettimeofday():
-    """
-    >>> test_gettimeofday()
-    """
-    cdef timeval t
-    ret = gettimeofday(&t, NULL)
-    assert ret == 0
--- a/tests/run/posix_time.pyx
+++ b/tests/run/posix_time.pyx
 # tag: posix
-from libc.stdlib  cimport getenv
-from posix.stdlib cimport setenv, unsetenv
-from posix.time cimport *

+from posix.time cimport *

-def test_time():
+def test_itimer(sec, usec):
    """
-    >>> test_time()
+    >>> test_itimer(10, 2)
+    (10, 2)
    """
-    cdef time_t t1, t2
-    t1 = time(NULL)
-    assert t1 != 0
-    t1 = time(&t2)
-    assert t1 == t2
+    cdef itimerval t, gtime

+    t.it_interval.tv_sec = sec
+    t.it_interval.tv_usec = usec
+    t.it_value.tv_sec = sec
+    t.it_value.tv_usec = usec
+    ret = setitimer(ITIMER_REAL, &t, NULL)
+    assert ret == 0
+    ret = getitimer(ITIMER_REAL, &gtime)
+    assert ret == 0
+    t.it_interval.tv_sec = 0
+    t.it_interval.tv_usec = 0
+    t.it_value.tv_sec = 0
+    t.it_value.tv_usec = 0
+    ret = setitimer(ITIMER_REAL, &t, NULL)
+    return gtime.it_interval.tv_sec, gtime.it_interval.tv_usec

-def test_mktime():
+def test_gettimeofday():
    """
-    >>> test_mktime()  # doctest:+ELLIPSIS
-    (986138177, ...'Sun Apr  1 15:16:17 2001\\n')
+    >>> test_gettimeofday()
    """
-    cdef tm t, gmt
-    cdef time_t tt
-    cdef char *ct
-    cdef char *tz
-
-    tz = getenv("TZ")
-    setenv("TZ", "UTC", 1)
-    tzset()
-    t.tm_sec = 17
-    t.tm_min = 16
-    t.tm_hour = 15
-    t.tm_year = 101
-    t.tm_mon = 3
-    t.tm_mday = 1
-    t.tm_isdst = 0
-    tt = mktime(&t)
-    assert tt != -1
-    ct = ctime(&tt)
-    assert ct != NULL
-    if tz:
-        setenv("TZ", tz, 1)
-    else:
-        unsetenv("TZ")
-    tzset()
-    return tt, ct
+    cdef timeval t
+    ret = gettimeofday(&t, NULL)
+    assert ret == 0
--- a/tests/run/type_inference.pyx
+++ b/tests/run/type_inference.pyx
@@ -496,6 +496,32 @@ def safe_c_functions():
    assert typeof(f) == 'int (*)(int)', typeof(f)
    assert 2 == f(1)

+@infer_types(None)
+def ptr_types():
+    """
+    >>> ptr_types()
+    """
+    cdef int a
+    a_ptr = &a
+    assert typeof(a_ptr) == "int *", typeof(a_ptr)
+    a_ptr_ptr = &a_ptr
+    assert typeof(a_ptr_ptr) == "int **", typeof(a_ptr_ptr)
+    cdef int[1] b
+    b_ref = b
+    assert typeof(b_ref) == "int *", typeof(b_ref)
+    ptr = &a
+    ptr = b
+    assert typeof(ptr) == "int *", typeof(ptr)
+
+def const_types(const double x, double y, double& z):
+    """
+    >>> const_types(1, 1, 1)
+    """
+    a = x
+    a = y
+    a = z
+    assert typeof(a) == "double", typeof(a)
+
 @infer_types(None)
 def args_tuple_keywords(*args, **kwargs):
    """