Commit 63c07e29 authored by Robert Bradshaw's avatar Robert Bradshaw

Merge branch 'master' into ctuple

Conflicts:
	CHANGES.rst
parents b7d80418 99e68228
......@@ -22,12 +22,14 @@ before_install:
- sudo apt-get install gdb python$( python -c 'import sys; print("%d.%d" % sys.version_info[:2])' )-dbg || true
- dpkg -l | grep gdb || true
install: CFLAGS="-O2 -ggdb" pip install .
install:
- CFLAGS="-O2 -ggdb -Wall -Wextra $(python -c 'import sys; print("-fno-strict-aliasing" if sys.version_info[0] == 2 else "")')" python setup.py build
script:
- PYTHON_DBG="python$( python -c 'import sys; print("%d.%d" % sys.version_info[:2])' )-dbg"
- if $PYTHON_DBG -V >&2; then CFLAGS="-O0 -ggdb" $PYTHON_DBG runtests.py -vv Debugger --backends=$BACKEND; fi
- CFLAGS="-O0 -ggdb" python runtests.py -vv -x Debugger --backends=$BACKEND
- CFLAGS="-O2 -ggdb -Wall -Wextra" python setup.py build_ext -i
- CFLAGS="-O0 -ggdb -Wall -Wextra" python runtests.py -vv -x Debugger --backends=$BACKEND
matrix:
allow_failures:
......@@ -38,4 +40,3 @@ matrix:
env: BACKEND=cpp
- python: pypy3
env: BACKEND=cpp
fast_finish: true
......@@ -8,6 +8,21 @@ Latest
Features added
--------------
* C functions can coerce to Python functions, which allows passing them
around as callable objects.
* New ``cythonize`` option ``-a`` to generate the annotated HTML source view.
* Extern C functions can now be declared as cpdef to export them to
the module's Python namespace. Extern C functions in pxd files export
their values to their own module, iff it exists.
* Missing C-API declarations in ``cpython.unicode`` were added.
* Passing ``language='c++'`` into cythonize() globally enables C++ mode for
all modules that were not passed as Extension objects (i.e. only source
files and file patterns).
* ``Py_hash_t`` is a known type (used in CPython for hash values).
* ``PySlice_*()`` C-API functions are available from the ``cpython.slice``
......@@ -15,14 +30,20 @@ Features added
* Anonymous C tuple types can be declared as (ctype1, ctype2, ...).
* Allow arrays of C++ classes.
Bugs fixed
----------
* Mismatching 'except' declarations on signatures in .pxd and .pyx files failed
to produce a compile error.
* Reference leak for non-simple Python expressions in boolean and/or expressions.
* ``getitimer()``, ``setitimer()``, ``gettimeofday()`` and related type/constant
definitions were moved from ``posix/time.pxd`` to ``posix/sys_time.pxd`` to
fix a naming collision.
* To fix a name collision and to reflect availability on host platforms,
standard C declarations [ clock(), time(), struct tm and tm* functions ]
were moved from posix/time.pxd to a new libc/time.pxd. Patch by Charles
Blake.
* Rerunning unmodified modules in IPython's cython support failed.
Patch by Matthias Bussonier.
......@@ -34,7 +55,12 @@ Bugs fixed
if the already created module was used later on (e.g. through a
stale reference in sys.modules or elsewhere).
* Allow arrays of C++ classes.
Other changes
-------------
* Compilation no longer fails hard when unknown compilation options are
passed. Instead, it raises a warning and ignores them (as it did silently
before 0.21). This will be changed back to an error in a future release.
0.21 (2014-09-10)
......
......@@ -145,6 +145,8 @@ def parse_args(args):
help='set a cythonize option')
parser.add_option('-3', dest='python3_mode', action='store_true',
help='use Python 3 syntax mode by default')
parser.add_option('-a', '--annotate', dest='annotate', action='store_true',
help='generate annotated HTML page for source files')
parser.add_option('-x', '--exclude', metavar='PATTERN', dest='excludes',
action='append', default=[],
......@@ -188,6 +190,9 @@ def main(args=None):
Options.error_on_unknown_names = False
Options.error_on_uninitialized = False
if options.annotate:
Options.annotate = True
for path in paths:
cython_compile(path, options)
......
......@@ -251,6 +251,7 @@ def strip_string_literals(code, prefix='__Pyx_L'):
in_quote = False
hash_mark = single_q = double_q = -1
code_len = len(code)
quote_type = quote_len = None
while True:
if hash_mark < q:
......@@ -260,7 +261,8 @@ def strip_string_literals(code, prefix='__Pyx_L'):
if double_q < q:
double_q = code.find('"', q)
q = min(single_q, double_q)
if q == -1: q = max(single_q, double_q)
if q == -1:
q = max(single_q, double_q)
# We're done.
if q == -1 and hash_mark == -1:
......@@ -276,7 +278,8 @@ def strip_string_literals(code, prefix='__Pyx_L'):
if k % 2 == 0:
q += 1
continue
if code[q] == quote_type and (quote_len == 1 or (code_len > q + 2 and quote_type == code[q+1] == code[q+2])):
if code[q] == quote_type and (
quote_len == 1 or (code_len > q + 2 and quote_type == code[q+1] == code[q+2])):
counter += 1
label = "%s%s_" % (prefix, counter)
literals[label] = code[start+quote_len:q]
......@@ -586,7 +589,8 @@ def create_dependency_tree(ctx=None, quiet=False):
# This may be useful for advanced users?
def create_extension_list(patterns, exclude=[], ctx=None, aliases=None, quiet=False, exclude_failures=False):
def create_extension_list(patterns, exclude=[], ctx=None, aliases=None, quiet=False, language=None,
exclude_failures=False):
if not isinstance(patterns, (list, tuple)):
patterns = [patterns]
explicit_modules = set([m.name for m in patterns if isinstance(m, Extension)])
......@@ -606,6 +610,7 @@ def create_extension_list(patterns, exclude=[], ctx=None, aliases=None, quiet=Fa
name = '*'
base = None
exn_type = Extension
ext_language = language
elif isinstance(pattern, Extension):
for filepattern in pattern.sources:
if os.path.splitext(filepattern)[1] in ('.py', '.pyx'):
......@@ -618,6 +623,7 @@ def create_extension_list(patterns, exclude=[], ctx=None, aliases=None, quiet=Fa
name = template.name
base = DistutilsInfo(exn=template)
exn_type = template.__class__
ext_language = None # do not override whatever the Extension says
else:
raise TypeError(pattern)
......@@ -661,6 +667,9 @@ def create_extension_list(patterns, exclude=[], ctx=None, aliases=None, quiet=Fa
depends = list(set(template.depends).union(set(depends)))
kwds['depends'] = depends
if ext_language and 'language' not in kwds:
kwds['language'] = ext_language
module_list.append(exn_type(
name=module_name,
sources=sources,
......@@ -671,7 +680,7 @@ def create_extension_list(patterns, exclude=[], ctx=None, aliases=None, quiet=Fa
# This is the user-exposed entry point.
def cythonize(module_list, exclude=[], nthreads=0, aliases=None, quiet=False, force=False,
def cythonize(module_list, exclude=[], nthreads=0, aliases=None, quiet=False, force=False, language=None,
exclude_failures=False, **options):
"""
Compile a set of source modules into C/C++ files and return a list of distutils
......@@ -684,6 +693,11 @@ def cythonize(module_list, exclude=[], nthreads=0, aliases=None, quiet=False, fo
When using glob patterns, you can exclude certain module names explicitly
by passing them into the 'exclude' option.
To globally enable C++ mode, you can pass language='c++'. Otherwise, this
will be determined at a per-file level based on compiler directives. This
affects only modules found based on file names. Extension instances passed
into cythonize() will not be changed.
For parallel compilation, set the 'nthreads' option to the number of
concurrent builds.
......@@ -711,6 +725,7 @@ def cythonize(module_list, exclude=[], nthreads=0, aliases=None, quiet=False, fo
ctx=ctx,
quiet=quiet,
exclude_failures=exclude_failures,
language=language,
aliases=aliases)
deps = create_dependency_tree(ctx, quiet=quiet)
build_dir = getattr(options, 'build_dir', None)
......
......@@ -4282,19 +4282,6 @@ class SliceNode(ExprNode):
if self.is_literal:
code.put_giveref(self.py_result())
def __deepcopy__(self, memo):
"""
There is a copy bug in python 2.4 for slice objects.
"""
return SliceNode(
self.pos,
start=copy.deepcopy(self.start, memo),
stop=copy.deepcopy(self.stop, memo),
step=copy.deepcopy(self.step, memo),
is_temp=self.is_temp,
is_literal=self.is_literal,
constant_result=self.constant_result)
class CallNode(ExprNode):
......@@ -6064,8 +6051,10 @@ class SequenceNode(ExprNode):
if isinstance(mult_factor.constant_result, (int,long)) \
and mult_factor.constant_result > 0:
size_factor = ' * %s' % mult_factor.constant_result
else:
elif mult_factor.type.signed:
size_factor = ' * ((%s<0) ? 0:%s)' % (c_mult, c_mult)
else:
size_factor = ' * (%s)' % (c_mult,)
if self.type is Builtin.tuple_type and (self.is_literal or self.slow) and not c_mult:
# use PyTuple_Pack() to avoid generating huge amounts of one-time code
......@@ -7597,7 +7586,7 @@ class BoundMethodNode(ExprNode):
def generate_result_code(self, code):
code.putln(
"%s = PyMethod_New(%s, %s, (PyObject*)%s->ob_type); %s" % (
"%s = __Pyx_PyMethod_New(%s, %s, (PyObject*)%s->ob_type); %s" % (
self.result(),
self.function.py_result(),
self.self_object.py_result(),
......@@ -7629,7 +7618,7 @@ class UnboundMethodNode(ExprNode):
def generate_result_code(self, code):
class_cname = code.pyclass_stack[-1].classobj.result()
code.putln(
"%s = PyMethod_New(%s, 0, %s); %s" % (
"%s = __Pyx_PyMethod_New(%s, 0, %s); %s" % (
self.result(),
self.function.py_result(),
class_cname,
......
......@@ -634,7 +634,7 @@ def check_definitions(flow, compiler_directives):
for entry in flow.entries:
if (not entry.cf_references
and not entry.is_pyclass_attr):
if entry.name != '_':
if entry.name != '_' and not entry.name.startswith('unused'):
# '_' is often used for unused variables, e.g. in loops
if entry.is_arg:
if warn_unused_arg:
......
......@@ -359,10 +359,9 @@ class Context(object):
return ".".join(names)
def setup_errors(self, options, result):
Errors.reset() # clear any remaining error state
Errors.reset() # clear any remaining error state
if options.use_listing_file:
result.listing_file = Utils.replace_suffix(source, ".lis")
path = result.listing_file
path = result.listing_file = Utils.replace_suffix(result.main_source_file, ".lis")
else:
path = None
Errors.open_listing_file(path=path,
......@@ -499,11 +498,14 @@ class CompilationOptions(object):
# ignore valid options that are not in the defaults
unknown_options.difference_update(['include_path'])
if unknown_options:
raise ValueError("got unexpected compilation option%s: %s" % (
# TODO: make this a hard error in 0.22
message = "got unknown compilation option%s, please remove: %s" % (
's' if len(unknown_options) > 1 else '',
', '.join(unknown_options)))
', '.join(unknown_options))
import warnings
warnings.warn(message)
directives = dict(options['compiler_directives']) # copy mutable field
directives = dict(options['compiler_directives']) # copy mutable field
options['compiler_directives'] = directives
if 'language_level' in directives and 'language_level' not in kw:
options['language_level'] = int(directives['language_level'])
......
......@@ -439,13 +439,16 @@ def get_is_contig_utility(c_contig, ndim):
return utility
def copy_src_to_dst_cname():
return "__pyx_memoryview_copy_contents"
def verify_direct_dimensions(node):
for access, packing in node.type.axes:
if access != 'direct':
error(self.pos, "All dimensions must be direct")
error(node.pos, "All dimensions must be direct")
def copy_broadcast_memview_src_to_dst(src, dst, code):
"""
......@@ -662,7 +665,7 @@ def get_axes_specs(env, axes):
if entry.name in view_constant_to_access_packing:
axes_specs.append(view_constant_to_access_packing[entry.name])
else:
raise CompilerError(axis.step.pos, INVALID_ERR)
raise CompileError(axis.step.pos, INVALID_ERR)
else:
raise CompileError(axis.step.pos, INVALID_ERR)
......
......@@ -1289,6 +1289,11 @@ class CVarDefNode(StatNode):
"Non-trivial type declarators in shared declaration (e.g. mix of pointers and values). " +
"Each pointer declaration should be on its own line.", 1)
create_extern_wrapper = (self.overridable
and self.visibility == 'extern'
and env.is_module_scope)
if create_extern_wrapper:
declarator.overridable = False
if isinstance(declarator, CFuncDeclaratorNode):
name_declarator, type = declarator.analyse(base_type, env, directive_locals=self.directive_locals)
else:
......@@ -1314,6 +1319,9 @@ class CVarDefNode(StatNode):
self.entry.directive_locals = copy.copy(self.directive_locals)
if 'staticmethod' in env.directives:
type.is_static_method = True
if create_extern_wrapper:
self.entry.type.create_to_py_utility_code(env)
self.entry.create_wrapper = True
else:
if self.directive_locals:
error(self.pos, "Decorators can only be followed by functions")
......@@ -1601,7 +1609,7 @@ class FuncDefNode(StatNode, BlockNode):
if arg.name in directive_locals:
type_node = directive_locals[arg.name]
other_type = type_node.analyse_as_type(env)
elif isinstance(arg, CArgDeclNode) and arg.annotation:
elif isinstance(arg, CArgDeclNode) and arg.annotation and env.directives['annotation_typing']:
type_node = arg.annotation
other_type = arg.inject_type_from_annotations(env)
if other_type is None:
......
......@@ -1244,7 +1244,10 @@ class CConstType(BaseType):
def declaration_code(self, entity_code,
for_display = 0, dll_linkage = None, pyrex = 0):
return self.const_base_type.declaration_code("const %s" % entity_code, for_display, dll_linkage, pyrex)
if for_display or pyrex:
return "const " + self.const_base_type.declaration_code(entity_code, for_display, dll_linkage, pyrex)
else:
return self.const_base_type.declaration_code("const %s" % entity_code, for_display, dll_linkage, pyrex)
def specialize(self, values):
base_type = self.const_base_type.specialize(values)
......@@ -1539,8 +1542,10 @@ class CBIntType(CIntType):
def declaration_code(self, entity_code,
for_display = 0, dll_linkage = None, pyrex = 0):
if pyrex or for_display:
if for_display:
base_code = 'bool'
elif pyrex:
base_code = 'bint'
else:
base_code = public_decl('int', dll_linkage)
return self.base_declaration_code(base_code, entity_code)
......@@ -2410,6 +2415,10 @@ class CFuncType(CType):
return 0
if not self.same_calling_convention_as(other_type):
return 0
if self.exception_value != other_type.exception_value:
return 0
if self.exception_check != other_type.exception_check:
return 0
return 1
def compatible_signature_with(self, other_type, as_cmethod = 0):
......@@ -2444,10 +2453,14 @@ class CFuncType(CType):
return 0
if self.nogil != other_type.nogil:
return 0
if self.exception_value != other_type.exception_value:
return 0
if not self.exception_check and other_type.exception_check:
# a redundant exception check doesn't make functions incompatible, but a missing one does
return 0
self.original_sig = other_type.original_sig or other_type
return 1
def narrower_c_signature_than(self, other_type, as_cmethod = 0):
return self.narrower_c_signature_than_resolved_type(other_type.resolve(), as_cmethod)
......@@ -2471,6 +2484,11 @@ class CFuncType(CType):
return 0
if not self.return_type.subtype_of_resolved_type(other_type.return_type):
return 0
if self.exception_value != other_type.exception_value:
return 0
if not self.exception_check and other_type.exception_check:
# a redundant exception check doesn't make functions incompatible, but a missing one does
return 0
return 1
def same_calling_convention_as(self, other):
......@@ -2487,22 +2505,12 @@ class CFuncType(CType):
sc2 = other.calling_convention == '__stdcall'
return sc1 == sc2
def same_exception_signature_as(self, other_type):
return self.same_exception_signature_as_resolved_type(
other_type.resolve())
def same_exception_signature_as_resolved_type(self, other_type):
return self.exception_value == other_type.exception_value \
and self.exception_check == other_type.exception_check
def same_as_resolved_type(self, other_type, as_cmethod = 0):
return self.same_c_signature_as_resolved_type(other_type, as_cmethod) \
and self.same_exception_signature_as_resolved_type(other_type) \
and self.nogil == other_type.nogil
def pointer_assignable_from_resolved_type(self, other_type):
return self.same_c_signature_as_resolved_type(other_type) \
and self.same_exception_signature_as_resolved_type(other_type) \
and not (self.nogil and not other_type.nogil)
def declaration_code(self, entity_code,
......@@ -2649,6 +2657,74 @@ class CFuncType(CType):
assert not self.is_fused
specialize_entry(entry, cname)
def create_to_py_utility_code(self, env):
# FIXME: it seems we're trying to coerce in more cases than we should
if self.has_varargs or self.optional_arg_count:
return False
if self.to_py_function is not None:
return self.to_py_function
from .UtilityCode import CythonUtilityCode
import re
safe_typename = re.sub('[^a-zA-Z0-9]', '__', self.declaration_code("", pyrex=1))
to_py_function = "__Pyx_CFunc_%s_to_py" % safe_typename
for arg in self.args:
if not arg.type.is_pyobject and not arg.type.create_from_py_utility_code(env):
return False
if not (self.return_type.is_pyobject or self.return_type.is_void or
self.return_type.create_to_py_utility_code(env)):
return False
def declared_type(ctype):
type_displayname = str(ctype.declaration_code("", for_display=True))
if ctype.is_pyobject:
arg_ctype = type_name = type_displayname
if ctype.is_builtin_type:
arg_ctype = ctype.name
elif not ctype.is_extension_type:
type_name = 'object'
type_displayname = None
else:
type_displayname = repr(type_displayname)
elif ctype is c_bint_type:
type_name = arg_ctype = 'bint'
else:
type_name = arg_ctype = type_displayname
if ctype is c_double_type:
type_displayname = 'float'
else:
type_displayname = repr(type_displayname)
return type_name, arg_ctype, type_displayname
class Arg(object):
def __init__(self, arg_name, arg_type):
self.name = arg_name
self.type = arg_type
self.type_cname, self.ctype, self.type_displayname = declared_type(arg_type)
if self.return_type.is_void:
except_clause = 'except *'
elif self.return_type.is_pyobject:
except_clause = ''
elif self.exception_value:
except_clause = ('except? %s' if self.exception_check else 'except %s') % self.exception_value
else:
except_clause = 'except *'
context = {
'cname': to_py_function,
'args': [Arg(arg.name or 'arg%s' % ix, arg.type) for ix, arg in enumerate(self.args)],
'return_type': Arg('return', self.return_type),
'except_clause': except_clause,
}
# FIXME: directives come from first defining environment and do not adapt for reuse
env.use_utility_code(CythonUtilityCode.load(
"cfunc.to_py", "CFuncConvert.pyx",
outer_module_scope=env.global_scope(), # need access to types declared in module
context=context, compiler_directives=dict(env.directives)))
self.to_py_function = to_py_function
return True
def specialize_entry(entry, cname):
"""
......@@ -3161,7 +3237,7 @@ class CppClassType(CType):
if self == actual:
return {}
# TODO(robertwb): Actual type equality.
elif self.empty_declaration_code() == actual.template_type.declaration_code(""):
elif self.empty_declaration_code() == actual.template_type.empty_declaration_code():
return reduce(
merge_template_deductions,
[formal_param.deduce_template_params(actual_param) for (formal_param, actual_param) in zip(self.templates, actual.templates)],
......
......@@ -4,10 +4,14 @@ import cython
from ..Plex.Scanners cimport Scanner
cdef get_lexicon()
cdef initial_compile_time_env()
cdef class Method:
cdef object name
cdef object __name__
@cython.final
cdef class CompileTimeScope:
cdef public dict entries
cdef public CompileTimeScope outer
......@@ -15,6 +19,7 @@ cdef class CompileTimeScope:
cdef lookup_here(self, name)
cpdef lookup(self, name)
@cython.final
cdef class PyrexScanner(Scanner):
cdef public context
cdef public list included_files
......
......@@ -5,13 +5,15 @@
from __future__ import absolute_import
import cython
cython.declare(EncodedString=object, make_lexicon=object, lexicon=object,
any_string_prefix=unicode, IDENT=unicode,
print_function=object, error=object, warning=object,
os=object, platform=object)
import os
import platform
import cython
cython.declare(EncodedString=object, any_string_prefix=unicode, IDENT=unicode,
print_function=object, error=object, warning=object)
from .. import Utils
from ..Plex.Scanners import Scanner
from ..Plex.Errors import UnrecognizedInput
......@@ -28,12 +30,14 @@ scanner_dump_file = None
lexicon = None
def get_lexicon():
global lexicon
if not lexicon:
lexicon = make_lexicon()
return lexicon
#------------------------------------------------------------------
py_reserved_words = [
......@@ -49,15 +53,17 @@ pyx_reserved_words = py_reserved_words + [
"cimport", "DEF", "IF", "ELIF", "ELSE"
]
class Method(object):
def __init__(self, name):
self.name = name
self.__name__ = name # for Plex tracing
self.__name__ = name # for Plex tracing
def __call__(self, stream, text):
return getattr(stream, self.name)(text)
#------------------------------------------------------------------
class CompileTimeScope(object):
......@@ -88,6 +94,7 @@ class CompileTimeScope(object):
else:
raise
def initial_compile_time_env():
benv = CompileTimeScope()
names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE',
......@@ -116,6 +123,7 @@ def initial_compile_time_env():
denv = CompileTimeScope(benv)
return denv
#------------------------------------------------------------------
class SourceDescriptor(object):
......@@ -166,6 +174,7 @@ class SourceDescriptor(object):
except AttributeError:
return False
class FileSourceDescriptor(SourceDescriptor):
"""
Represents a code source. A code source is a more generic abstraction
......@@ -235,6 +244,7 @@ class FileSourceDescriptor(SourceDescriptor):
def __repr__(self):
return "<FileSourceDescriptor:%s>" % self.filename
class StringSourceDescriptor(SourceDescriptor):
"""
Instances of this class can be used instead of a filenames if the
......@@ -275,6 +285,7 @@ class StringSourceDescriptor(SourceDescriptor):
def __repr__(self):
return "<StringSourceDescriptor:%s>" % self.name
#------------------------------------------------------------------
class PyrexScanner(Scanner):
......@@ -284,8 +295,8 @@ class PyrexScanner(Scanner):
# compile_time_eval boolean In a true conditional compilation context
# compile_time_expr boolean In a compile-time expression context
def __init__(self, file, filename, parent_scanner = None,
scope = None, context = None, source_encoding=None, parse_comments=True, initial_pos=None):
def __init__(self, file, filename, parent_scanner=None,
scope=None, context=None, source_encoding=None, parse_comments=True, initial_pos=None):
Scanner.__init__(self, get_lexicon(), file, filename, initial_pos)
if parent_scanner:
self.context = parent_scanner.context
......@@ -299,8 +310,7 @@ class PyrexScanner(Scanner):
self.compile_time_env = initial_compile_time_env()
self.compile_time_eval = 1
self.compile_time_expr = 0
if hasattr(context.options, 'compile_time_env') and \
context.options.compile_time_env is not None:
if getattr(context.options, 'compile_time_env', None):
self.compile_time_env.update(context.options.compile_time_env)
self.parse_comments = parse_comments
self.source_encoding = source_encoding
......@@ -326,11 +336,11 @@ class PyrexScanner(Scanner):
return self.indentation_stack[-1]
def open_bracket_action(self, text):
self.bracket_nesting_level = self.bracket_nesting_level + 1
self.bracket_nesting_level += 1
return text
def close_bracket_action(self, text):
self.bracket_nesting_level = self.bracket_nesting_level - 1
self.bracket_nesting_level -= 1
return text
def newline_action(self, text):
......@@ -406,6 +416,7 @@ class PyrexScanner(Scanner):
sy, systring = self.read()
except UnrecognizedInput:
self.error("Unrecognized character")
return # just a marker, error() always raises
if sy == IDENT:
if systring in self.keywords:
if systring == u'print' and print_function in self.context.future_directives:
......@@ -445,21 +456,21 @@ class PyrexScanner(Scanner):
# This method should be added to Plex
self.queue.insert(0, (token, value))
def error(self, message, pos = None, fatal = True):
def error(self, message, pos=None, fatal=True):
if pos is None:
pos = self.position()
if self.sy == 'INDENT':
err = error(pos, "Possible inconsistent indentation")
error(pos, "Possible inconsistent indentation")
err = error(pos, message)
if fatal: raise err
def expect(self, what, message = None):
def expect(self, what, message=None):
if self.sy == what:
self.next()
else:
self.expected(what, message)
def expect_keyword(self, what, message = None):
def expect_keyword(self, what, message=None):
if self.sy == IDENT and self.systring == what:
self.next()
else:
......@@ -476,12 +487,10 @@ class PyrexScanner(Scanner):
self.error("Expected '%s', found '%s'" % (what, found))
def expect_indent(self):
self.expect('INDENT',
"Expected an increase in indentation level")
self.expect('INDENT', "Expected an increase in indentation level")
def expect_dedent(self):
self.expect('DEDENT',
"Expected a decrease in indentation level")
self.expect('DEDENT', "Expected a decrease in indentation level")
def expect_newline(self, message="Expected a newline", ignore_semicolon=False):
# Expect either a newline or end of file
......
......@@ -303,7 +303,7 @@ class Scope(object):
self.name = name
self.outer_scope = outer_scope
self.parent_scope = parent_scope
mangled_name = "%d%s_" % (len(name), name)
mangled_name = "%d%s_" % (len(name), name.replace('.', '_dot_'))
qual_scope = self.qualifying_scope()
if qual_scope:
self.qualified_name = qual_scope.qualify_name(name)
......@@ -1044,15 +1044,13 @@ class ModuleScope(Scope):
def global_scope(self):
return self
def lookup(self, name):
def lookup(self, name, language_level=None):
entry = self.lookup_here(name)
if entry is not None:
return entry
if self.context is not None:
language_level = self.context.language_level
else:
language_level = 3
if language_level is None:
language_level = self.context.language_level if self.context is not None else 3
return self.outer_scope.lookup(name, language_level=language_level)
......
......@@ -23,16 +23,19 @@ from . import UtilNodes
class StringParseContext(Main.Context):
def __init__(self, name, include_directories=None):
if include_directories is None: include_directories = []
Main.Context.__init__(self, include_directories, {},
def __init__(self, name, include_directories=None, compiler_directives=None):
if include_directories is None:
include_directories = []
if compiler_directives is None:
compiler_directives = {}
Main.Context.__init__(self, include_directories, compiler_directives,
create_testscope=False)
self.module_name = name
def find_module(self, module_name, relative_to = None, pos = None, need_pxd = 1):
def find_module(self, module_name, relative_to=None, pos=None, need_pxd=1):
if module_name not in (self.module_name, 'cython'):
raise AssertionError("Not yet supporting any cimports/includes from string code snippets")
return ModuleScope(module_name, parent_module = None, context = self)
return ModuleScope(module_name, parent_module=None, context=self)
def parse_from_strings(name, code, pxds={}, level=None, initial_pos=None,
......@@ -64,7 +67,7 @@ def parse_from_strings(name, code, pxds={}, level=None, initial_pos=None,
initial_pos = (name, 1, 0)
code_source = StringSourceDescriptor(name, code)
scope = context.find_module(module_name, pos = initial_pos, need_pxd = 0)
scope = context.find_module(module_name, pos=initial_pos, need_pxd=False)
buf = StringIO(code)
......@@ -190,20 +193,27 @@ class TemplateTransform(VisitorTransform):
else:
return self.visit_Node(node)
def copy_code_tree(node):
return TreeCopier()(node)
INDENT_RE = re.compile(ur"^ *")
_match_indent = re.compile(ur"^ *").match
def strip_common_indent(lines):
"Strips empty lines and common indentation from the list of strings given in lines"
"""Strips empty lines and common indentation from the list of strings given in lines"""
# TODO: Facilitate textwrap.indent instead
lines = [x for x in lines if x.strip() != u""]
minindent = min([len(INDENT_RE.match(x).group(0)) for x in lines])
minindent = min([len(_match_indent(x).group(0)) for x in lines])
lines = [x[minindent:] for x in lines]
return lines
class TreeFragment(object):
def __init__(self, code, name="(tree fragment)", pxds={}, temps=[], pipeline=[], level=None, initial_pos=None):
def __init__(self, code, name=None, pxds={}, temps=[], pipeline=[], level=None, initial_pos=None):
if not name:
name = "(tree fragment)"
if isinstance(code, unicode):
def fmt(x): return u"\n".join(strip_common_indent(x.split(u"\n")))
......
......@@ -494,24 +494,22 @@ def find_spanning_type(type1, type2):
return PyrexTypes.c_double_type
return result_type
def aggressive_spanning_type(types, might_overflow, pos):
result_type = reduce(find_spanning_type, types)
def simply_type(result_type, pos):
if result_type.is_reference:
result_type = result_type.ref_base_type
if result_type.is_const:
result_type = result_type.const_base_type
if result_type.is_cpp_class:
result_type.check_nullary_constructor(pos)
if result_type.is_array:
result_type = PyrexTypes.c_ptr_type(result_type.base_type)
return result_type
def aggressive_spanning_type(types, might_overflow, pos):
return simply_type(reduce(find_spanning_type, types), pos)
def safe_spanning_type(types, might_overflow, pos):
result_type = reduce(find_spanning_type, types)
if result_type.is_const:
result_type = result_type.const_base_type
if result_type.is_reference:
result_type = result_type.ref_base_type
if result_type.is_cpp_class:
result_type.check_nullary_constructor(pos)
result_type = simply_type(reduce(find_spanning_type, types), pos)
if result_type.is_pyobject:
# In theory, any specific Python type is always safe to
# infer. However, inferring str can cause some existing code
......
......@@ -8,6 +8,8 @@ from . import Code
class NonManglingModuleScope(Symtab.ModuleScope):
cpp = False
def __init__(self, prefix, *args, **kw):
self.prefix = prefix
self.cython_scope = None
......@@ -28,12 +30,11 @@ class NonManglingModuleScope(Symtab.ModuleScope):
else:
return Symtab.ModuleScope.mangle(self, prefix)
class CythonUtilityCodeContext(StringParseContext):
scope = None
def find_module(self, module_name, relative_to = None, pos = None,
need_pxd = 1):
def find_module(self, module_name, relative_to=None, pos=None, need_pxd=True):
if module_name != self.module_name:
if module_name not in self.modules:
raise AssertionError("Only the cython cimport is supported.")
......@@ -41,10 +42,8 @@ class CythonUtilityCodeContext(StringParseContext):
return self.modules[module_name]
if self.scope is None:
self.scope = NonManglingModuleScope(self.prefix,
module_name,
parent_module=None,
context=self)
self.scope = NonManglingModuleScope(
self.prefix, module_name, parent_module=None, context=self)
return self.scope
......@@ -69,7 +68,8 @@ class CythonUtilityCode(Code.UtilityCodeBase):
is_cython_utility = True
def __init__(self, impl, name="__pyxutil", prefix="", requires=None,
file=None, from_scope=None, context=None):
file=None, from_scope=None, context=None, compiler_directives=None,
outer_module_scope=None):
# 1) We need to delay the parsing/processing, so that all modules can be
# imported without import loops
# 2) The same utility code object can be used for multiple source files;
......@@ -84,6 +84,20 @@ class CythonUtilityCode(Code.UtilityCodeBase):
self.prefix = prefix
self.requires = requires or []
self.from_scope = from_scope
self.outer_module_scope = outer_module_scope
self.compiler_directives = compiler_directives
def __eq__(self, other):
if isinstance(other, CythonUtilityCode):
return self._equality_params() == other._equality_params()
else:
return False
def _equality_params(self):
return self.impl, self.outer_module_scope, self.compiler_directives
def __hash__(self):
return hash(self.impl)
def get_tree(self, entries_only=False, cython_scope=None):
from .AnalysedTreeTransforms import AutoTestDictTransform
......@@ -93,12 +107,13 @@ class CythonUtilityCode(Code.UtilityCodeBase):
excludes = [AutoTestDictTransform]
from . import Pipeline, ParseTreeTransforms
context = CythonUtilityCodeContext(self.name)
context = CythonUtilityCodeContext(
self.name, compiler_directives=self.compiler_directives)
context.prefix = self.prefix
context.cython_scope = cython_scope
#context = StringParseContext(self.name)
tree = parse_from_strings(self.name, self.impl, context=context,
allow_struct_enum_decorator=True)
tree = parse_from_strings(
self.name, self.impl, context=context, allow_struct_enum_decorator=True)
pipeline = Pipeline.create_pipeline(context, 'pyx', exclude_classes=excludes)
if entries_only:
......@@ -126,6 +141,16 @@ class CythonUtilityCode(Code.UtilityCodeBase):
pipeline = Pipeline.insert_into_pipeline(pipeline, scope_transform,
before=transform)
if self.outer_module_scope:
# inject outer module between utility code module and builtin module
def scope_transform(module_node):
module_node.scope.outer_scope = self.outer_module_scope
return module_node
transform = ParseTreeTransforms.AnalyseDeclarationsTransform
pipeline = Pipeline.insert_into_pipeline(pipeline, scope_transform,
before=transform)
(err, tree) = Pipeline.run_pipeline(pipeline, tree, printtree=False)
assert not err, err
return tree
......
......@@ -131,6 +131,131 @@ cdef extern from *:
#Py_ssize_t PyUnicode_AsWideChar(object o, wchar_t *w, Py_ssize_t size)
# Unicode Methods
# Concat two strings giving a new Unicode string.
# Return value: New reference.
unicode PyUnicode_Concat(object left, object right)
# Split a string giving a list of Unicode strings. If sep is NULL,
# splitting will be done at all whitespace substrings. Otherwise,
# splits occur at the given separator. At most maxsplit splits will
# be done. If negative, no limit is set. Separators are not included
# in the resulting list.
# Return value: New reference.
list PyUnicode_Split(object s, object sep, Py_ssize_t maxsplit)
# Split a Unicode string at line breaks, returning a list of Unicode
# strings. CRLF is considered to be one line break. If keepend is 0,
# the Line break characters are not included in the resulting strings.
# Return value: New reference.
list PyUnicode_Splitlines(object s, bint keepend)
# Translate a string by applying a character mapping table to it and
# return the resulting Unicode object.
#
# The mapping table must map Unicode ordinal integers to Unicode ordinal
# integers or None (causing deletion of the character).
#
# Mapping tables need only provide the __getitem__() interface;
# dictionaries and sequences work well. Unmapped character ordinals (ones
# which cause a LookupError) are left untouched and are copied as-is.
#
# errors has the usual meaning for codecs. It may be NULL which indicates
# to use the default error handling.
# Return value: New reference.
unicode PyUnicode_Translate(object str, object table, const char *errors)
# Join a sequence of strings using the given separator and return the
# resulting Unicode string.
# Return value: New reference.
unicode PyUnicode_Join(object separator, object seq)
# Return 1 if substr matches str[start:end] at the given tail end
# (direction == -1 means to do a prefix match, direction == 1 a
# suffix match), 0 otherwise.
# Return -1 if an error occurred.
Py_ssize_t PyUnicode_Tailmatch(object str, object substr,
Py_ssize_t start, Py_ssize_t end, int direction) except -1
# Return the first position of substr in str[start:end] using the given
# direction (direction == 1 means to do a forward search, direction == -1
# a backward search). The return value is the index of the first match;
# a value of -1 indicates that no match was found, and -2 indicates that an
# error occurred and an exception has been set.
Py_ssize_t PyUnicode_Find(object str, object substr, Py_ssize_t start, Py_ssize_t end, int direction) except -2
# Return the first position of the character ch in str[start:end] using
# the given direction (direction == 1 means to do a forward search,
# direction == -1 a backward search). The return value is the index of
# the first match; a value of -1 indicates that no match was found, and
# -2 indicates that an error occurred and an exception has been set.
# New in version 3.3.
Py_ssize_t PyUnicode_FindChar(object str, Py_UCS4 ch, Py_ssize_t start, Py_ssize_t end, int direction) except -2
# Return the number of non-overlapping occurrences of substr in
# str[start:end]. Return -1 if an error occurred.
Py_ssize_t PyUnicode_Count(object str, object substr, Py_ssize_t start, Py_ssize_t end) except -1
# Replace at most maxcount occurrences of substr in str with replstr and
# return the resulting Unicode object. maxcount == -1 means replace all
# occurrences.
# Return value: New reference.
unicode PyUnicode_Replace(object str, object substr, object replstr, Py_ssize_t maxcount)
# Compare two strings and return -1, 0, 1 for less than,
# equal, and greater than, respectively.
int PyUnicode_Compare(object left, object right) except? -1
# Compare a unicode object, uni, with string and return -1, 0, 1 for less than,
# equal, and greater than, respectively. It is best to pass only ASCII-encoded
# strings, but the function interprets the input string as ISO-8859-1 if it
# contains non-ASCII characters.
int PyUnicode_CompareWithASCIIString(object uni, char *string) except? -1
# Rich compare two unicode strings and return one of the following:
#
# NULL in case an exception was raised
# Py_True or Py_False for successful comparisons
# Py_NotImplemented in case the type combination is unknown
#
# Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in case
# the conversion of the arguments to Unicode fails with a UnicodeDecodeError.
#
# Possible values for op are Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, and Py_LE.
object PyUnicode_RichCompare(object left, object right, int op)
# Return a new string object from format and args; this is analogous to
# format % args.
# Return value: New reference.
unicode PyUnicode_Format(object format, object args)
# Check whether element is contained in container and return true or false
# accordingly.
#
# element has to coerce to a one element Unicode string. -1 is returned
# if there was an error.
int PyUnicode_Contains(object container, object element) except -1
# Intern the argument *string in place. The argument must be the address
# of a pointer variable pointing to a Python unicode string object. If
# there is an existing interned string that is the same as *string, it sets
# *string to it (decrementing the reference count of the old string object
# and incrementing the reference count of the interned string object),
# otherwise it leaves *string alone and interns it (incrementing its reference
# count). (Clarification: even though there is a lot of talk about reference
# counts, think of this function as reference-count-neutral; you own the object
# after the call if and only if you owned it before the call.)
#void PyUnicode_InternInPlace(PyObject **string)
# A combination of PyUnicode_FromString() and PyUnicode_InternInPlace(),
# returning either a new unicode string object that has been interned, or
# a new ("owned") reference to an earlier interned string object with the
# same value.
unicode PyUnicode_InternFromString(const char *v)
# Codecs
# Create a Unicode object by decoding size bytes of the encoded
......@@ -161,22 +286,22 @@ cdef extern from *:
# Create a Unicode object by decoding size bytes of the UTF-8
# encoded string s. Return NULL if an exception was raised by the
# codec.
object PyUnicode_DecodeUTF8(char *s, Py_ssize_t size, char *errors)
unicode PyUnicode_DecodeUTF8(char *s, Py_ssize_t size, char *errors)
# If consumed is NULL, behave like PyUnicode_DecodeUTF8(). If
# consumed is not NULL, trailing incomplete UTF-8 byte sequences
# will not be treated as an error. Those bytes will not be decoded
# and the number of bytes that have been decoded will be stored in
# consumed. New in version 2.4.
object PyUnicode_DecodeUTF8Stateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed)
unicode PyUnicode_DecodeUTF8Stateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed)
# Encode the Py_UNICODE buffer of the given size using UTF-8 and
# return a Python string object. Return NULL if an exception was
# raised by the codec.
object PyUnicode_EncodeUTF8(Py_UNICODE *s, Py_ssize_t size, char *errors)
bytes PyUnicode_EncodeUTF8(Py_UNICODE *s, Py_ssize_t size, char *errors)
# Encode a Unicode objects using UTF-8 and return the result as Python string object. Error handling is ``strict''. Return NULL if an exception was raised by the codec.
object PyUnicode_AsUTF8String(object unicode)
bytes PyUnicode_AsUTF8String(object unicode)
# These are the UTF-16 codec APIs:
......@@ -198,7 +323,7 @@ cdef extern from *:
# order at the.
#
# If byteorder is NULL, the codec starts in native order mode.
object PyUnicode_DecodeUTF16(char *s, Py_ssize_t size, char *errors, int *byteorder)
unicode PyUnicode_DecodeUTF16(char *s, Py_ssize_t size, char *errors, int *byteorder)
# If consumed is NULL, behave like PyUnicode_DecodeUTF16(). If
# consumed is not NULL, PyUnicode_DecodeUTF16Stateful() will not
......@@ -206,7 +331,7 @@ cdef extern from *:
# number of bytes or a split surrogate pair) as an error. Those
# bytes will not be decoded and the number of bytes that have been
# decoded will be stored in consumed. New in version 2.4.
object PyUnicode_DecodeUTF16Stateful(char *s, Py_ssize_t size, char *errors, int *byteorder, Py_ssize_t *consumed)
unicode PyUnicode_DecodeUTF16Stateful(char *s, Py_ssize_t size, char *errors, int *byteorder, Py_ssize_t *consumed)
# Return a Python string object holding the UTF-16 encoded value
# of the Unicode data in s. If byteorder is not 0, output is
......@@ -223,13 +348,13 @@ cdef extern from *:
# If Py_UNICODE_WIDE is defined, a single Py_UNICODE value may get
# represented as a surrogate pair. If it is not defined, each
# Py_UNICODE values is interpreted as an UCS-2 character.
object PyUnicode_EncodeUTF16(Py_UNICODE *s, Py_ssize_t size, char *errors, int byteorder)
bytes PyUnicode_EncodeUTF16(Py_UNICODE *s, Py_ssize_t size, char *errors, int byteorder)
# Return a Python string using the UTF-16 encoding in native byte
# order. The string always starts with a BOM mark. Error handling
# is ``strict''. Return NULL if an exception was raised by the
# codec.
object PyUnicode_AsUTF16String(object unicode)
bytes PyUnicode_AsUTF16String(object unicode)
# These are the ``Unicode Escape'' codec APIs:
......@@ -270,17 +395,17 @@ cdef extern from *:
# Create a Unicode object by decoding size bytes of the Latin-1
# encoded string s. Return NULL if an exception was raised by the
# codec.
object PyUnicode_DecodeLatin1(char *s, Py_ssize_t size, char *errors)
unicode PyUnicode_DecodeLatin1(char *s, Py_ssize_t size, char *errors)
# Encode the Py_UNICODE buffer of the given size using Latin-1 and
# return a Python string object. Return NULL if an exception was
# return a Python bytes object. Return NULL if an exception was
# raised by the codec.
object PyUnicode_EncodeLatin1(Py_UNICODE *s, Py_ssize_t size, char *errors)
bytes PyUnicode_EncodeLatin1(Py_UNICODE *s, Py_ssize_t size, char *errors)
# Encode a Unicode objects using Latin-1 and return the result as
# Python string object. Error handling is ``strict''. Return NULL
# Python bytes object. Error handling is ``strict''. Return NULL
# if an exception was raised by the codec.
object PyUnicode_AsLatin1String(object unicode)
bytes PyUnicode_AsLatin1String(object unicode)
# These are the ASCII codec APIs. Only 7-bit ASCII data is
# accepted. All other codes generate errors.
......@@ -288,17 +413,17 @@ cdef extern from *:
# Create a Unicode object by decoding size bytes of the ASCII
# encoded string s. Return NULL if an exception was raised by the
# codec.
object PyUnicode_DecodeASCII(char *s, Py_ssize_t size, char *errors)
unicode PyUnicode_DecodeASCII(char *s, Py_ssize_t size, char *errors)
# Encode the Py_UNICODE buffer of the given size using ASCII and
# return a Python string object. Return NULL if an exception was
# return a Python bytes object. Return NULL if an exception was
# raised by the codec.
object PyUnicode_EncodeASCII(Py_UNICODE *s, Py_ssize_t size, char *errors)
bytes PyUnicode_EncodeASCII(Py_UNICODE *s, Py_ssize_t size, char *errors)
# Encode a Unicode objects using ASCII and return the result as
# Python string object. Error handling is ``strict''. Return NULL
# Python bytes object. Error handling is ``strict''. Return NULL
# if an exception was raised by the codec.
object PyUnicode_AsASCIIString(object o)
bytes PyUnicode_AsASCIIString(object o)
# These are the mapping codec APIs:
#
......@@ -339,6 +464,8 @@ cdef extern from *:
# Encode the Py_UNICODE buffer of the given size using the given
# mapping object and return a Python string object. Return NULL if
# an exception was raised by the codec.
#
# Deprecated since version 3.3, will be removed in version 4.0.
object PyUnicode_EncodeCharmap(Py_UNICODE *s, Py_ssize_t size, object mapping, char *errors)
# Encode a Unicode objects using the given mapping object and
......@@ -359,6 +486,8 @@ cdef extern from *:
# dictionaries and sequences work well. Unmapped character
# ordinals (ones which cause a LookupError) are left untouched and
# are copied as-is.
#
# Deprecated since version 3.3, will be removed in version 4.0.
object PyUnicode_TranslateCharmap(Py_UNICODE *s, Py_ssize_t size,
object table, char *errors)
......@@ -371,21 +500,43 @@ cdef extern from *:
# Create a Unicode object by decoding size bytes of the MBCS
# encoded string s. Return NULL if an exception was raised by the
# codec.
object PyUnicode_DecodeMBCS(char *s, Py_ssize_t size, char *errors)
unicode PyUnicode_DecodeMBCS(char *s, Py_ssize_t size, char *errors)
# If consumed is NULL, behave like PyUnicode_DecodeMBCS(). If
# consumed is not NULL, PyUnicode_DecodeMBCSStateful() will not
# decode trailing lead byte and the number of bytes that have been
# decoded will be stored in consumed. New in version 2.5.
# NOTE: Python 2.x uses 'int' values for 'size' and 'consumed' (changed in 3.0)
object PyUnicode_DecodeMBCSStateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed)
unicode PyUnicode_DecodeMBCSStateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed)
# Encode the Py_UNICODE buffer of the given size using MBCS and
# return a Python string object. Return NULL if an exception was
# raised by the codec.
object PyUnicode_EncodeMBCS(Py_UNICODE *s, Py_ssize_t size, char *errors)
bytes PyUnicode_EncodeMBCS(Py_UNICODE *s, Py_ssize_t size, char *errors)
# Encode a Unicode objects using MBCS and return the result as
# Python string object. Error handling is ``strict''. Return NULL
# if an exception was raised by the codec.
object PyUnicode_AsMBCSString(object o)
bytes PyUnicode_AsMBCSString(object o)
# Encode the Unicode object using the specified code page and return
# a Python bytes object. Return NULL if an exception was raised by the
# codec. Use CP_ACP code page to get the MBCS encoder.
#
# New in version 3.3.
bytes PyUnicode_EncodeCodePage(int code_page, object unicode, const char *errors)
# Py_UCS4 helpers (new in CPython 3.3)
# These utility functions work on strings of Py_UCS4 characters and
# otherwise behave like the C standard library functions with the same name.
size_t Py_UCS4_strlen(const Py_UCS4 *u)
Py_UCS4* Py_UCS4_strcpy(Py_UCS4 *s1, const Py_UCS4 *s2)
Py_UCS4* Py_UCS4_strncpy(Py_UCS4 *s1, const Py_UCS4 *s2, size_t n)
Py_UCS4* Py_UCS4_strcat(Py_UCS4 *s1, const Py_UCS4 *s2)
int Py_UCS4_strcmp(const Py_UCS4 *s1, const Py_UCS4 *s2)
int Py_UCS4_strncmp(const Py_UCS4 *s1, const Py_UCS4 *s2, size_t n)
Py_UCS4* Py_UCS4_strchr(const Py_UCS4 *s, Py_UCS4 c)
Py_UCS4* Py_UCS4_strrchr(const Py_UCS4 *s, Py_UCS4 c)
# http://en.wikipedia.org/wiki/C_date_and_time_functions
from libc.stddef cimport wchar_t
cdef extern from "time.h" nogil:
ctypedef long clock_t
ctypedef long time_t
enum: CLOCKS_PER_SEC
clock_t clock() # CPU time
time_t time(time_t *) # wall clock time since Unix epoch
cdef struct tm:
int tm_sec
int tm_min
int tm_hour
int tm_mday
int tm_mon
int tm_year
int tm_wday
int tm_yday
int tm_isdst
char *tm_zone
long tm_gmtoff
int daylight # global state
long timezone
char *tzname[2]
void tzset()
char *asctime(const tm *)
char *asctime_r(const tm *, char *)
char *ctime(const time_t *)
char *ctime_r(const time_t *, char *)
double difftime(time_t, time_t)
tm *getdate(const char *)
tm *gmtime(const time_t *)
tm *gmtime_r(const time_t *, tm *)
tm *localtime(const time_t *)
tm *localtime_r(const time_t *, tm *)
time_t mktime(tm *)
size_t strftime(char *, size_t, const char *, const tm *)
size_t wcsftime(wchar_t *str, size_t cnt, const wchar_t *fmt, tm *time)
# POSIX not stdC
char *strptime(const char *, const char *, tm *)
# http://pubs.opengroup.org/onlinepubs/009695399/basedefs/sys/resource.h.html
from posix.sys_time cimport timeval
from posix.time cimport timeval
from posix.types cimport id_t
cdef extern from "sys/resource.h" nogil:
......
# http://pubs.opengroup.org/onlinepubs/009695399/basedefs/sys/time.h.html
from posix.types cimport suseconds_t, time_t
cdef extern from "sys/time.h" nogil:
enum: ITIMER_REAL
enum: ITIMER_VIRTUAL
enum: ITIMER_PROF
cdef struct timezone:
int tz_minuteswest
int dsttime
cdef struct timeval:
time_t tv_sec
suseconds_t tv_usec
cdef struct itimerval:
timeval it_interval
timeval it_value
int getitimer(int, itimerval *)
int gettimeofday(timeval *tp, timezone *tzp)
int setitimer(int, const itimerval *, itimerval *)
# http://pubs.opengroup.org/onlinepubs/009695399/basedefs/sys/time.h.html
from posix.types cimport suseconds_t, time_t, clockid_t, timer_t
from posix.signal cimport sigevent
from posix.types cimport clock_t, clockid_t, suseconds_t, time_t, timer_t
cdef extern from "time.h" nogil:
enum: CLOCKS_PER_SEC
cdef extern from "sys/time.h" nogil:
enum: CLOCK_PROCESS_CPUTIME_ID
enum: CLOCK_THREAD_CPUTIME_ID
......@@ -33,55 +31,44 @@ cdef extern from "time.h" nogil:
enum: CLOCK_REALTIME_ALARM
enum: CLOCK_BOOTTIME_ALARM
enum: ITIMER_REAL
enum: ITIMER_VIRTUAL
enum: ITIMER_PROF
cdef struct timezone:
int tz_minuteswest
int dsttime
cdef struct timeval:
time_t tv_sec
suseconds_t tv_usec
cdef struct timespec:
time_t tv_sec
long tv_nsec
cdef struct itimerval:
timeval it_interval
timeval it_value
cdef struct itimerspec:
timespec it_interval
timespec it_value
cdef struct tm:
int tm_sec
int tm_min
int tm_hour
int tm_mday
int tm_mon
int tm_year
int tm_wday
int tm_yday
int tm_isdst
char *tm_zone
long tm_gmtoff
int nanosleep(const timespec *, timespec *)
int getitimer(int, itimerval *)
int gettimeofday(timeval *tp, timezone *tzp)
int setitimer(int, const itimerval *, itimerval *)
char *asctime(const tm *)
char *asctime_r(const tm *, char *)
clock_t clock()
int clock_getcpuclockid(pid_t, clockid_t *)
int clock_getres(clockid_t, timespec *)
int clock_gettime(clockid_t, timespec *)
int clock_nanosleep(clockid_t, int, const timespec *, timespec *)
int clock_settime(clockid_t, const timespec *)
char *ctime(const time_t *)
char *ctime_r(const time_t *, char *)
double difftime(time_t, time_t)
tm *getdate(const char *)
tm *gmtime(const time_t *)
tm *gmtime_r(const time_t *, tm *)
tm *localtime(const time_t *)
tm *localtime_r(const time_t *, tm *)
time_t mktime(tm *)
int nanosleep(const timespec *, timespec *)
size_t strftime(char *, size_t, const char *, const tm *)
char *strptime(const char *, const char *, tm *)
time_t time(time_t *)
int timer_create(clockid_t, sigevent *, timer_t *)
int timer_delete(timer_t)
int timer_gettime(timer_t, itimerspec *)
int timer_getoverrun(timer_t)
int timer_settime(timer_t, int, const itimerspec *, itimerspec *)
void tzset()
int clock_getcpuclockid(pid_t, clockid_t *)
int clock_getres(clockid_t, timespec *)
int clock_gettime(clockid_t, timespec *)
int clock_nanosleep(clockid_t, int, const timespec *, timespec *)
int clock_settime(clockid_t, const timespec *)
int daylight
long timezone
char *tzname[2]
int timer_create(clockid_t, sigevent *, timer_t *)
int timer_delete(timer_t)
int timer_gettime(timer_t, itimerspec *)
int timer_getoverrun(timer_t)
int timer_settime(timer_t, int, const itimerspec *, itimerspec *)
cdef extern from "sys/types.h":
ctypedef long blkcnt_t
ctypedef long blksize_t
ctypedef long clock_t
ctypedef long clockid_t
ctypedef long dev_t
ctypedef long gid_t
......
......@@ -7,98 +7,101 @@
#=======================================================================
class Action(object):
def perform(self, token_stream, text):
pass # abstract
def perform(self, token_stream, text):
pass # abstract
def same_as(self, other):
return self is other
def same_as(self, other):
return self is other
class Return(Action):
"""
Internal Plex action which causes |value| to
be returned as the value of the associated token
"""
"""
Internal Plex action which causes |value| to
be returned as the value of the associated token
"""
def __init__(self, value):
self.value = value
def __init__(self, value):
self.value = value
def perform(self, token_stream, text):
return self.value
def perform(self, token_stream, text):
return self.value
def same_as(self, other):
return isinstance(other, Return) and self.value == other.value
def same_as(self, other):
return isinstance(other, Return) and self.value == other.value
def __repr__(self):
return "Return(%s)" % repr(self.value)
def __repr__(self):
return "Return(%s)" % repr(self.value)
class Call(Action):
"""
Internal Plex action which causes a function to be called.
"""
"""
Internal Plex action which causes a function to be called.
"""
def __init__(self, function):
self.function = function
def __init__(self, function):
self.function = function
def perform(self, token_stream, text):
return self.function(token_stream, text)
def perform(self, token_stream, text):
return self.function(token_stream, text)
def __repr__(self):
return "Call(%s)" % self.function.__name__
def __repr__(self):
return "Call(%s)" % self.function.__name__
def same_as(self, other):
return isinstance(other, Call) and self.function is other.function
def same_as(self, other):
return isinstance(other, Call) and self.function is other.function
class Begin(Action):
"""
Begin(state_name) is a Plex action which causes the Scanner to
enter the state |state_name|. See the docstring of Plex.Lexicon
for more information.
"""
"""
Begin(state_name) is a Plex action which causes the Scanner to
enter the state |state_name|. See the docstring of Plex.Lexicon
for more information.
"""
def __init__(self, state_name):
self.state_name = state_name
def __init__(self, state_name):
self.state_name = state_name
def perform(self, token_stream, text):
token_stream.begin(self.state_name)
def perform(self, token_stream, text):
token_stream.begin(self.state_name)
def __repr__(self):
return "Begin(%s)" % self.state_name
def __repr__(self):
return "Begin(%s)" % self.state_name
def same_as(self, other):
return isinstance(other, Begin) and self.state_name == other.state_name
def same_as(self, other):
return isinstance(other, Begin) and self.state_name == other.state_name
class Ignore(Action):
"""
IGNORE is a Plex action which causes its associated token
to be ignored. See the docstring of Plex.Lexicon for more
information.
"""
def perform(self, token_stream, text):
return None
"""
IGNORE is a Plex action which causes its associated token
to be ignored. See the docstring of Plex.Lexicon for more
information.
"""
def perform(self, token_stream, text):
return None
def __repr__(self):
return "IGNORE"
def __repr__(self):
return "IGNORE"
IGNORE = Ignore()
#IGNORE.__doc__ = Ignore.__doc__
class Text(Action):
"""
TEXT is a Plex action which causes the text of a token to
be returned as the value of the token. See the docstring of
Plex.Lexicon for more information.
"""
"""
TEXT is a Plex action which causes the text of a token to
be returned as the value of the token. See the docstring of
Plex.Lexicon for more information.
"""
def perform(self, token_stream, text):
return text
def perform(self, token_stream, text):
return text
def __repr__(self):
return "TEXT"
def __repr__(self):
return "TEXT"
TEXT = Text()
#TEXT.__doc__ = Text.__doc__
......
......@@ -13,147 +13,152 @@ from .Machines import LOWEST_PRIORITY
from .Transitions import TransitionMap
def nfa_to_dfa(old_machine, debug = None):
"""
Given a nondeterministic Machine, return a new equivalent
Machine which is deterministic.
"""
# We build a new machine whose states correspond to sets of states
# in the old machine. Initially we add a new state corresponding to
# the epsilon-closure of each initial old state. Then we give transitions
# to each new state which are the union of all transitions out of any
# of the corresponding old states. The new state reached on a given
# character is the one corresponding to the set of states reachable
# on that character from any of the old states. As new combinations of
# old states are created, new states are added as needed until closure
# is reached.
new_machine = Machines.FastMachine()
state_map = StateMap(new_machine)
# Seed the process using the initial states of the old machine.
# Make the corresponding new states into initial states of the new
# machine with the same names.
for (key, old_state) in old_machine.initial_states.iteritems():
new_state = state_map.old_to_new(epsilon_closure(old_state))
new_machine.make_initial_state(key, new_state)
# Tricky bit here: we add things to the end of this list while we're
# iterating over it. The iteration stops when closure is achieved.
for new_state in new_machine.states:
transitions = TransitionMap()
for old_state in state_map.new_to_old(new_state):
for event, old_target_states in old_state.transitions.iteritems():
if event and old_target_states:
transitions.add_set(event, set_epsilon_closure(old_target_states))
for event, old_states in transitions.iteritems():
new_machine.add_transitions(new_state, event, state_map.old_to_new(old_states))
if debug:
debug.write("\n===== State Mapping =====\n")
state_map.dump(debug)
return new_machine
def nfa_to_dfa(old_machine, debug=None):
"""
Given a nondeterministic Machine, return a new equivalent
Machine which is deterministic.
"""
# We build a new machine whose states correspond to sets of states
# in the old machine. Initially we add a new state corresponding to
# the epsilon-closure of each initial old state. Then we give transitions
# to each new state which are the union of all transitions out of any
# of the corresponding old states. The new state reached on a given
# character is the one corresponding to the set of states reachable
# on that character from any of the old states. As new combinations of
# old states are created, new states are added as needed until closure
# is reached.
new_machine = Machines.FastMachine()
state_map = StateMap(new_machine)
# Seed the process using the initial states of the old machine.
# Make the corresponding new states into initial states of the new
# machine with the same names.
for (key, old_state) in old_machine.initial_states.iteritems():
new_state = state_map.old_to_new(epsilon_closure(old_state))
new_machine.make_initial_state(key, new_state)
# Tricky bit here: we add things to the end of this list while we're
# iterating over it. The iteration stops when closure is achieved.
for new_state in new_machine.states:
transitions = TransitionMap()
for old_state in state_map.new_to_old(new_state):
for event, old_target_states in old_state.transitions.iteritems():
if event and old_target_states:
transitions.add_set(event, set_epsilon_closure(old_target_states))
for event, old_states in transitions.iteritems():
new_machine.add_transitions(new_state, event, state_map.old_to_new(old_states))
if debug:
debug.write("\n===== State Mapping =====\n")
state_map.dump(debug)
return new_machine
def set_epsilon_closure(state_set):
"""
Given a set of states, return the union of the epsilon
closures of its member states.
"""
result = {}
for state1 in state_set:
for state2 in epsilon_closure(state1):
result[state2] = 1
return result
"""
Given a set of states, return the union of the epsilon
closures of its member states.
"""
result = {}
for state1 in state_set:
for state2 in epsilon_closure(state1):
result[state2] = 1
return result
def epsilon_closure(state):
"""
Return the set of states reachable from the given state
by epsilon moves.
"""
# Cache the result
result = state.epsilon_closure
if result is None:
result = {}
state.epsilon_closure = result
add_to_epsilon_closure(result, state)
return result
"""
Return the set of states reachable from the given state
by epsilon moves.
"""
# Cache the result
result = state.epsilon_closure
if result is None:
result = {}
state.epsilon_closure = result
add_to_epsilon_closure(result, state)
return result
def add_to_epsilon_closure(state_set, state):
"""
Recursively add to |state_set| states reachable from the given state
by epsilon moves.
"""
if not state_set.get(state, 0):
state_set[state] = 1
state_set_2 = state.transitions.get_epsilon()
if state_set_2:
for state2 in state_set_2:
add_to_epsilon_closure(state_set, state2)
class StateMap(object):
"""
Helper class used by nfa_to_dfa() to map back and forth between
sets of states from the old machine and states of the new machine.
"""
new_machine = None # Machine
old_to_new_dict = None # {(old_state,...) : new_state}
new_to_old_dict = None # {id(new_state) : old_state_set}
def __init__(self, new_machine):
self.new_machine = new_machine
self.old_to_new_dict = {}
self.new_to_old_dict= {}
def old_to_new(self, old_state_set):
def add_to_epsilon_closure(state_set, state):
"""
Return the state of the new machine corresponding to the
set of old machine states represented by |state_set|. A new
state will be created if necessary. If any of the old states
are accepting states, the new state will be an accepting state
with the highest priority action from the old states.
Recursively add to |state_set| states reachable from the given state
by epsilon moves.
"""
key = self.make_key(old_state_set)
new_state = self.old_to_new_dict.get(key, None)
if not new_state:
action = self.highest_priority_action(old_state_set)
new_state = self.new_machine.new_state(action)
self.old_to_new_dict[key] = new_state
self.new_to_old_dict[id(new_state)] = old_state_set
#for old_state in old_state_set.keys():
#new_state.merge_actions(old_state)
return new_state
def highest_priority_action(self, state_set):
best_action = None
best_priority = LOWEST_PRIORITY
for state in state_set:
priority = state.action_priority
if priority > best_priority:
best_action = state.action
best_priority = priority
return best_action
# def old_to_new_set(self, old_state_set):
# """
# Return the new state corresponding to a set of old states as
# a singleton set.
# """
# return {self.old_to_new(old_state_set):1}
def new_to_old(self, new_state):
"""Given a new state, return a set of corresponding old states."""
return self.new_to_old_dict[id(new_state)]
def make_key(self, state_set):
if not state_set.get(state, 0):
state_set[state] = 1
state_set_2 = state.transitions.get_epsilon()
if state_set_2:
for state2 in state_set_2:
add_to_epsilon_closure(state_set, state2)
class StateMap(object):
"""
Convert a set of states into a uniquified
sorted tuple suitable for use as a dictionary key.
Helper class used by nfa_to_dfa() to map back and forth between
sets of states from the old machine and states of the new machine.
"""
lst = list(state_set)
lst.sort()
return tuple(lst)
def dump(self, file):
from .Transitions import state_set_str
for new_state in self.new_machine.states:
old_state_set = self.new_to_old_dict[id(new_state)]
file.write(" State %s <-- %s\n" % (
new_state['number'], state_set_str(old_state_set)))
new_machine = None # Machine
old_to_new_dict = None # {(old_state,...) : new_state}
new_to_old_dict = None # {id(new_state) : old_state_set}
def __init__(self, new_machine):
self.new_machine = new_machine
self.old_to_new_dict = {}
self.new_to_old_dict = {}
def old_to_new(self, old_state_set):
"""
Return the state of the new machine corresponding to the
set of old machine states represented by |state_set|. A new
state will be created if necessary. If any of the old states
are accepting states, the new state will be an accepting state
with the highest priority action from the old states.
"""
key = self.make_key(old_state_set)
new_state = self.old_to_new_dict.get(key, None)
if not new_state:
action = self.highest_priority_action(old_state_set)
new_state = self.new_machine.new_state(action)
self.old_to_new_dict[key] = new_state
self.new_to_old_dict[id(new_state)] = old_state_set
#for old_state in old_state_set.keys():
#new_state.merge_actions(old_state)
return new_state
def highest_priority_action(self, state_set):
best_action = None
best_priority = LOWEST_PRIORITY
for state in state_set:
priority = state.action_priority
if priority > best_priority:
best_action = state.action
best_priority = priority
return best_action
# def old_to_new_set(self, old_state_set):
# """
# Return the new state corresponding to a set of old states as
# a singleton set.
# """
# return {self.old_to_new(old_state_set):1}
def new_to_old(self, new_state):
"""Given a new state, return a set of corresponding old states."""
return self.new_to_old_dict[id(new_state)]
def make_key(self, state_set):
"""
Convert a set of states into a uniquified
sorted tuple suitable for use as a dictionary key.
"""
lst = list(state_set)
lst.sort()
return tuple(lst)
def dump(self, file):
from .Transitions import state_set_str
for new_state in self.new_machine.states:
old_state_set = self.new_to_old_dict[id(new_state)]
file.write(" State %s <-- %s\n" % (
new_state['number'], state_set_str(old_state_set)))
......@@ -6,45 +6,49 @@
#
#=======================================================================
class PlexError(Exception):
message = ""
message = ""
class PlexTypeError(PlexError, TypeError):
pass
pass
class PlexValueError(PlexError, ValueError):
pass
pass
class InvalidRegex(PlexError):
pass
pass
class InvalidToken(PlexError):
def __init__(self, token_number, message):
PlexError.__init__(self, "Token number %d: %s" % (token_number, message))
def __init__(self, token_number, message):
PlexError.__init__(self, "Token number %d: %s" % (token_number, message))
class InvalidScanner(PlexError):
pass
class AmbiguousAction(PlexError):
message = "Two tokens with different actions can match the same string"
def __init__(self):
pass
class UnrecognizedInput(PlexError):
scanner = None
position = None
state_name = None
def __init__(self, scanner, state_name):
self.scanner = scanner
self.position = scanner.get_position()
self.state_name = state_name
def __str__(self):
return ("'%s', line %d, char %d: Token not recognised in state %s"
% (self.position + (repr(self.state_name),)))
class AmbiguousAction(PlexError):
message = "Two tokens with different actions can match the same string"
def __init__(self):
pass
class UnrecognizedInput(PlexError):
scanner = None
position = None
state_name = None
def __init__(self, scanner, state_name):
self.scanner = scanner
self.position = scanner.get_position()
self.state_name = state_name
def __str__(self):
return ("'%s', line %d, char %d: Token not recognised in state %r" % (
self.position + (self.state_name,)))
......@@ -22,177 +22,179 @@ DUMP_DFA = 2
class State(object):
"""
This class is used as part of a Plex.Lexicon specification to
introduce a user-defined state.
"""
This class is used as part of a Plex.Lexicon specification to
introduce a user-defined state.
Constructor:
Constructor:
State(name, token_specifications)
"""
State(name, token_specifications)
"""
name = None
tokens = None
name = None
tokens = None
def __init__(self, name, tokens):
self.name = name
self.tokens = tokens
def __init__(self, name, tokens):
self.name = name
self.tokens = tokens
class Lexicon(object):
"""
Lexicon(specification) builds a lexical analyser from the given
|specification|. The specification consists of a list of
specification items. Each specification item may be either:
1) A token definition, which is a tuple:
(pattern, action)
The |pattern| is a regular axpression built using the
constructors defined in the Plex module.
The |action| is the action to be performed when this pattern
is recognised (see below).
2) A state definition:
State(name, tokens)
where |name| is a character string naming the state,
and |tokens| is a list of token definitions as
above. The meaning and usage of states is described
below.
Actions
-------
The |action| in a token specication may be one of three things:
1) A function, which is called as follows:
function(scanner, text)
where |scanner| is the relevant Scanner instance, and |text|
is the matched text. If the function returns anything
other than None, that value is returned as the value of the
token. If it returns None, scanning continues as if the IGNORE
action were specified (see below).
2) One of the following special actions:
IGNORE means that the recognised characters will be treated as
white space and ignored. Scanning will continue until
the next non-ignored token is recognised before returning.
TEXT causes the scanned text itself to be returned as the
value of the token.
3) Any other value, which is returned as the value of the token.
States
------
At any given time, the scanner is in one of a number of states.
Associated with each state is a set of possible tokens. When scanning,
only tokens associated with the current state are recognised.
There is a default state, whose name is the empty string. Token
definitions which are not inside any State definition belong to
the default state.
The initial state of the scanner is the default state. The state can
be changed in one of two ways:
1) Using Begin(state_name) as the action of a token.
2) Calling the begin(state_name) method of the Scanner.
To change back to the default state, use '' as the state name.
"""
machine = None # Machine
tables = None # StateTableMachine
def __init__(self, specifications, debug = None, debug_flags = 7, timings = None):
if type(specifications) != types.ListType:
raise Errors.InvalidScanner("Scanner definition is not a list")
if timings:
from .Timing import time
total_time = 0.0
time1 = time()
nfa = Machines.Machine()
default_initial_state = nfa.new_initial_state('')
token_number = 1
for spec in specifications:
if isinstance(spec, State):
user_initial_state = nfa.new_initial_state(spec.name)
for token in spec.tokens:
self.add_token_to_machine(
nfa, user_initial_state, token, token_number)
token_number = token_number + 1
elif type(spec) == types.TupleType:
self.add_token_to_machine(
nfa, default_initial_state, spec, token_number)
token_number = token_number + 1
else:
raise Errors.InvalidToken(
token_number,
"Expected a token definition (tuple) or State instance")
if timings:
time2 = time()
total_time = total_time + (time2 - time1)
time3 = time()
if debug and (debug_flags & 1):
debug.write("\n============= NFA ===========\n")
nfa.dump(debug)
dfa = DFA.nfa_to_dfa(nfa, debug = (debug_flags & 3) == 3 and debug)
if timings:
time4 = time()
total_time = total_time + (time4 - time3)
if debug and (debug_flags & 2):
debug.write("\n============= DFA ===========\n")
dfa.dump(debug)
if timings:
timings.write("Constructing NFA : %5.2f\n" % (time2 - time1))
timings.write("Converting to DFA: %5.2f\n" % (time4 - time3))
timings.write("TOTAL : %5.2f\n" % total_time)
self.machine = dfa
def add_token_to_machine(self, machine, initial_state, token_spec, token_number):
try:
(re, action_spec) = self.parse_token_definition(token_spec)
# Disabled this -- matching empty strings can be useful
#if re.nullable:
# raise Errors.InvalidToken(
# token_number, "Pattern can match 0 input symbols")
if isinstance(action_spec, Actions.Action):
action = action_spec
else:
"""
Lexicon(specification) builds a lexical analyser from the given
|specification|. The specification consists of a list of
specification items. Each specification item may be either:
1) A token definition, which is a tuple:
(pattern, action)
The |pattern| is a regular axpression built using the
constructors defined in the Plex module.
The |action| is the action to be performed when this pattern
is recognised (see below).
2) A state definition:
State(name, tokens)
where |name| is a character string naming the state,
and |tokens| is a list of token definitions as
above. The meaning and usage of states is described
below.
Actions
-------
The |action| in a token specication may be one of three things:
1) A function, which is called as follows:
function(scanner, text)
where |scanner| is the relevant Scanner instance, and |text|
is the matched text. If the function returns anything
other than None, that value is returned as the value of the
token. If it returns None, scanning continues as if the IGNORE
action were specified (see below).
2) One of the following special actions:
IGNORE means that the recognised characters will be treated as
white space and ignored. Scanning will continue until
the next non-ignored token is recognised before returning.
TEXT causes the scanned text itself to be returned as the
value of the token.
3) Any other value, which is returned as the value of the token.
States
------
At any given time, the scanner is in one of a number of states.
Associated with each state is a set of possible tokens. When scanning,
only tokens associated with the current state are recognised.
There is a default state, whose name is the empty string. Token
definitions which are not inside any State definition belong to
the default state.
The initial state of the scanner is the default state. The state can
be changed in one of two ways:
1) Using Begin(state_name) as the action of a token.
2) Calling the begin(state_name) method of the Scanner.
To change back to the default state, use '' as the state name.
"""
machine = None # Machine
tables = None # StateTableMachine
def __init__(self, specifications, debug=None, debug_flags=7, timings=None):
if type(specifications) != types.ListType:
raise Errors.InvalidScanner("Scanner definition is not a list")
if timings:
from .Timing import time
total_time = 0.0
time1 = time()
nfa = Machines.Machine()
default_initial_state = nfa.new_initial_state('')
token_number = 1
for spec in specifications:
if isinstance(spec, State):
user_initial_state = nfa.new_initial_state(spec.name)
for token in spec.tokens:
self.add_token_to_machine(
nfa, user_initial_state, token, token_number)
token_number += 1
elif type(spec) == types.TupleType:
self.add_token_to_machine(
nfa, default_initial_state, spec, token_number)
token_number += 1
else:
raise Errors.InvalidToken(
token_number,
"Expected a token definition (tuple) or State instance")
if timings:
time2 = time()
total_time = total_time + (time2 - time1)
time3 = time()
if debug and (debug_flags & 1):
debug.write("\n============= NFA ===========\n")
nfa.dump(debug)
dfa = DFA.nfa_to_dfa(nfa, debug=(debug_flags & 3) == 3 and debug)
if timings:
time4 = time()
total_time = total_time + (time4 - time3)
if debug and (debug_flags & 2):
debug.write("\n============= DFA ===========\n")
dfa.dump(debug)
if timings:
timings.write("Constructing NFA : %5.2f\n" % (time2 - time1))
timings.write("Converting to DFA: %5.2f\n" % (time4 - time3))
timings.write("TOTAL : %5.2f\n" % total_time)
self.machine = dfa
def add_token_to_machine(self, machine, initial_state, token_spec, token_number):
try:
action_spec.__call__
except AttributeError:
action = Actions.Return(action_spec)
else:
action = Actions.Call(action_spec)
final_state = machine.new_state()
re.build_machine(machine, initial_state, final_state,
match_bol = 1, nocase = 0)
final_state.set_action(action, priority = -token_number)
except Errors.PlexError, e:
raise e.__class__("Token number %d: %s" % (token_number, e))
def parse_token_definition(self, token_spec):
if type(token_spec) != types.TupleType:
raise Errors.InvalidToken("Token definition is not a tuple")
if len(token_spec) != 2:
raise Errors.InvalidToken("Wrong number of items in token definition")
pattern, action = token_spec
if not isinstance(pattern, Regexps.RE):
raise Errors.InvalidToken("Pattern is not an RE instance")
return (pattern, action)
def get_initial_state(self, name):
return self.machine.get_initial_state(name)
(re, action_spec) = self.parse_token_definition(token_spec)
# Disabled this -- matching empty strings can be useful
#if re.nullable:
# raise Errors.InvalidToken(
# token_number, "Pattern can match 0 input symbols")
if isinstance(action_spec, Actions.Action):
action = action_spec
else:
try:
action_spec.__call__
except AttributeError:
action = Actions.Return(action_spec)
else:
action = Actions.Call(action_spec)
final_state = machine.new_state()
re.build_machine(machine, initial_state, final_state,
match_bol=1, nocase=0)
final_state.set_action(action, priority=-token_number)
except Errors.PlexError, e:
raise e.__class__("Token number %d: %s" % (token_number, e))
def parse_token_definition(self, token_spec):
if type(token_spec) != types.TupleType:
raise Errors.InvalidToken("Token definition is not a tuple")
if len(token_spec) != 2:
raise Errors.InvalidToken("Wrong number of items in token definition")
pattern, action = token_spec
if not isinstance(pattern, Regexps.RE):
raise Errors.InvalidToken("Pattern is not an RE instance")
return (pattern, action)
def get_initial_state(self, name):
return self.machine.get_initial_state(name)
......@@ -16,244 +16,245 @@ LOWEST_PRIORITY = -sys.maxint
class Machine(object):
"""A collection of Nodes representing an NFA or DFA."""
states = None # [Node]
next_state_number = 1
initial_states = None # {(name, bol): Node}
def __init__(self):
self.states = []
self.initial_states = {}
def __del__(self):
#print "Destroying", self ###
for state in self.states:
state.destroy()
def new_state(self):
"""Add a new state to the machine and return it."""
s = Node()
n = self.next_state_number
self.next_state_number = n + 1
s.number = n
self.states.append(s)
return s
def new_initial_state(self, name):
state = self.new_state()
self.make_initial_state(name, state)
return state
def make_initial_state(self, name, state):
self.initial_states[name] = state
def get_initial_state(self, name):
return self.initial_states[name]
def dump(self, file):
file.write("Plex.Machine:\n")
if self.initial_states is not None:
file.write(" Initial states:\n")
for (name, state) in self.initial_states.iteritems():
file.write(" '%s': %d\n" % (name, state.number))
for s in self.states:
s.dump(file)
"""A collection of Nodes representing an NFA or DFA."""
states = None # [Node]
next_state_number = 1
initial_states = None # {(name, bol): Node}
def __init__(self):
self.states = []
self.initial_states = {}
def __del__(self):
#print "Destroying", self ###
for state in self.states:
state.destroy()
def new_state(self):
"""Add a new state to the machine and return it."""
s = Node()
n = self.next_state_number
self.next_state_number = n + 1
s.number = n
self.states.append(s)
return s
def new_initial_state(self, name):
state = self.new_state()
self.make_initial_state(name, state)
return state
def make_initial_state(self, name, state):
self.initial_states[name] = state
def get_initial_state(self, name):
return self.initial_states[name]
def dump(self, file):
file.write("Plex.Machine:\n")
if self.initial_states is not None:
file.write(" Initial states:\n")
for (name, state) in self.initial_states.iteritems():
file.write(" '%s': %d\n" % (name, state.number))
for s in self.states:
s.dump(file)
class Node(object):
"""A state of an NFA or DFA."""
transitions = None # TransitionMap
action = None # Action
action_priority = None # integer
number = 0 # for debug output
epsilon_closure = None # used by nfa_to_dfa()
def __init__(self):
# Preinitialise the list of empty transitions, because
# the nfa-to-dfa algorithm needs it
#self.transitions = {'':[]}
self.transitions = TransitionMap()
self.action_priority = LOWEST_PRIORITY
def destroy(self):
#print "Destroying", self ###
self.transitions = None
self.action = None
self.epsilon_closure = None
def add_transition(self, event, new_state):
self.transitions.add(event, new_state)
def link_to(self, state):
"""Add an epsilon-move from this state to another state."""
self.add_transition('', state)
def set_action(self, action, priority):
"""Make this an accepting state with the given action. If
there is already an action, choose the action with highest
priority."""
if priority > self.action_priority:
self.action = action
self.action_priority = priority
def get_action(self):
return self.action
def get_action_priority(self):
return self.action_priority
def is_accepting(self):
return self.action is not None
def __str__(self):
return "State %d" % self.number
def dump(self, file):
# Header
file.write(" State %d:\n" % self.number)
# Transitions
# self.dump_transitions(file)
self.transitions.dump(file)
# Action
action = self.action
priority = self.action_priority
if action is not None:
file.write(" %s [priority %d]\n" % (action, priority))
def __lt__(self, other):
return self.number < other.number
"""A state of an NFA or DFA."""
transitions = None # TransitionMap
action = None # Action
action_priority = None # integer
number = 0 # for debug output
epsilon_closure = None # used by nfa_to_dfa()
def __init__(self):
# Preinitialise the list of empty transitions, because
# the nfa-to-dfa algorithm needs it
#self.transitions = {'':[]}
self.transitions = TransitionMap()
self.action_priority = LOWEST_PRIORITY
def destroy(self):
#print "Destroying", self ###
self.transitions = None
self.action = None
self.epsilon_closure = None
def add_transition(self, event, new_state):
self.transitions.add(event, new_state)
def link_to(self, state):
"""Add an epsilon-move from this state to another state."""
self.add_transition('', state)
def set_action(self, action, priority):
"""Make this an accepting state with the given action. If
there is already an action, choose the action with highest
priority."""
if priority > self.action_priority:
self.action = action
self.action_priority = priority
def get_action(self):
return self.action
def get_action_priority(self):
return self.action_priority
def is_accepting(self):
return self.action is not None
def __str__(self):
return "State %d" % self.number
def dump(self, file):
# Header
file.write(" State %d:\n" % self.number)
# Transitions
# self.dump_transitions(file)
self.transitions.dump(file)
# Action
action = self.action
priority = self.action_priority
if action is not None:
file.write(" %s [priority %d]\n" % (action, priority))
def __lt__(self, other):
return self.number < other.number
class FastMachine(object):
"""
FastMachine is a deterministic machine represented in a way that
allows fast scanning.
"""
initial_states = None # {state_name:state}
states = None # [state]
# where state = {event:state, 'else':state, 'action':Action}
next_number = 1 # for debugging
new_state_template = {
'':None, 'bol':None, 'eol':None, 'eof':None, 'else':None
}
def __init__(self, old_machine = None):
self.initial_states = initial_states = {}
self.states = []
if old_machine:
self.old_to_new = old_to_new = {}
for old_state in old_machine.states:
new_state = self.new_state()
old_to_new[old_state] = new_state
for name, old_state in old_machine.initial_states.iteritems():
initial_states[name] = old_to_new[old_state]
for old_state in old_machine.states:
new_state = old_to_new[old_state]
for event, old_state_set in old_state.transitions.iteritems():
if old_state_set:
new_state[event] = old_to_new[old_state_set.keys()[0]]
else:
new_state[event] = None
new_state['action'] = old_state.action
def __del__(self):
for state in self.states:
state.clear()
def new_state(self, action = None):
number = self.next_number
self.next_number = number + 1
result = self.new_state_template.copy()
result['number'] = number
result['action'] = action
self.states.append(result)
return result
def make_initial_state(self, name, state):
self.initial_states[name] = state
def add_transitions(self, state, event, new_state, maxint=sys.maxint):
if type(event) is tuple:
code0, code1 = event
if code0 == -maxint:
state['else'] = new_state
elif code1 != maxint:
while code0 < code1:
state[chr(code0)] = new_state
code0 = code0 + 1
else:
state[event] = new_state
def get_initial_state(self, name):
return self.initial_states[name]
def dump(self, file):
file.write("Plex.FastMachine:\n")
file.write(" Initial states:\n")
for name, state in self.initial_states.iteritems():
file.write(" %s: %s\n" % (repr(name), state['number']))
for state in self.states:
self.dump_state(state, file)
def dump_state(self, state, file):
# Header
file.write(" State %d:\n" % state['number'])
# Transitions
self.dump_transitions(state, file)
# Action
action = state['action']
if action is not None:
file.write(" %s\n" % action)
def dump_transitions(self, state, file):
chars_leading_to_state = {}
special_to_state = {}
for (c, s) in state.iteritems():
if len(c) == 1:
chars = chars_leading_to_state.get(id(s), None)
if chars is None:
chars = []
chars_leading_to_state[id(s)] = chars
chars.append(c)
elif len(c) <= 4:
special_to_state[c] = s
ranges_to_state = {}
for state in self.states:
char_list = chars_leading_to_state.get(id(state), None)
if char_list:
ranges = self.chars_to_ranges(char_list)
ranges_to_state[ranges] = state
ranges_list = ranges_to_state.keys()
ranges_list.sort()
for ranges in ranges_list:
key = self.ranges_to_string(ranges)
state = ranges_to_state[ranges]
file.write(" %s --> State %d\n" % (key, state['number']))
for key in ('bol', 'eol', 'eof', 'else'):
state = special_to_state.get(key, None)
if state:
file.write(" %s --> State %d\n" % (key, state['number']))
def chars_to_ranges(self, char_list):
char_list.sort()
i = 0
n = len(char_list)
result = []
while i < n:
c1 = ord(char_list[i])
c2 = c1
i = i + 1
while i < n and ord(char_list[i]) == c2 + 1:
i = i + 1
c2 = c2 + 1
result.append((chr(c1), chr(c2)))
return tuple(result)
def ranges_to_string(self, range_list):
return ','.join(map(self.range_to_string, range_list))
def range_to_string(self, range_tuple):
(c1, c2) = range_tuple
if c1 == c2:
return repr(c1)
else:
return "%s..%s" % (repr(c1), repr(c2))
"""
FastMachine is a deterministic machine represented in a way that
allows fast scanning.
"""
initial_states = None # {state_name:state}
states = None # [state] where state = {event:state, 'else':state, 'action':Action}
next_number = 1 # for debugging
new_state_template = {
'': None, 'bol': None, 'eol': None, 'eof': None, 'else': None
}
def __init__(self, old_machine=None):
self.initial_states = initial_states = {}
self.states = []
if old_machine:
self.old_to_new = old_to_new = {}
for old_state in old_machine.states:
new_state = self.new_state()
old_to_new[old_state] = new_state
for name, old_state in old_machine.initial_states.iteritems():
initial_states[name] = old_to_new[old_state]
for old_state in old_machine.states:
new_state = old_to_new[old_state]
for event, old_state_set in old_state.transitions.iteritems():
if old_state_set:
new_state[event] = old_to_new[old_state_set.keys()[0]]
else:
new_state[event] = None
new_state['action'] = old_state.action
def __del__(self):
for state in self.states:
state.clear()
def new_state(self, action=None):
number = self.next_number
self.next_number = number + 1
result = self.new_state_template.copy()
result['number'] = number
result['action'] = action
self.states.append(result)
return result
def make_initial_state(self, name, state):
self.initial_states[name] = state
def add_transitions(self, state, event, new_state, maxint=sys.maxint):
if type(event) is tuple:
code0, code1 = event
if code0 == -maxint:
state['else'] = new_state
elif code1 != maxint:
while code0 < code1:
state[unichr(code0)] = new_state
code0 += 1
else:
state[event] = new_state
def get_initial_state(self, name):
return self.initial_states[name]
def dump(self, file):
file.write("Plex.FastMachine:\n")
file.write(" Initial states:\n")
for name, state in self.initial_states.iteritems():
file.write(" %s: %s\n" % (repr(name), state['number']))
for state in self.states:
self.dump_state(state, file)
def dump_state(self, state, file):
# Header
file.write(" State %d:\n" % state['number'])
# Transitions
self.dump_transitions(state, file)
# Action
action = state['action']
if action is not None:
file.write(" %s\n" % action)
def dump_transitions(self, state, file):
chars_leading_to_state = {}
special_to_state = {}
for (c, s) in state.iteritems():
if len(c) == 1:
chars = chars_leading_to_state.get(id(s), None)
if chars is None:
chars = []
chars_leading_to_state[id(s)] = chars
chars.append(c)
elif len(c) <= 4:
special_to_state[c] = s
ranges_to_state = {}
for state in self.states:
char_list = chars_leading_to_state.get(id(state), None)
if char_list:
ranges = self.chars_to_ranges(char_list)
ranges_to_state[ranges] = state
ranges_list = ranges_to_state.keys()
ranges_list.sort()
for ranges in ranges_list:
key = self.ranges_to_string(ranges)
state = ranges_to_state[ranges]
file.write(" %s --> State %d\n" % (key, state['number']))
for key in ('bol', 'eol', 'eof', 'else'):
state = special_to_state.get(key, None)
if state:
file.write(" %s --> State %d\n" % (key, state['number']))
def chars_to_ranges(self, char_list):
char_list.sort()
i = 0
n = len(char_list)
result = []
while i < n:
c1 = ord(char_list[i])
c2 = c1
i += 1
while i < n and ord(char_list[i]) == c2 + 1:
i += 1
c2 += 1
result.append((chr(c1), chr(c2)))
return tuple(result)
def ranges_to_string(self, range_list):
return ','.join(map(self.range_to_string, range_list))
def range_to_string(self, range_tuple):
(c1, c2) = range_tuple
if c1 == c2:
return repr(c1)
else:
return "%s..%s" % (repr(c1), repr(c2))
......@@ -42,14 +42,15 @@ def chars_to_ranges(s):
while i < n:
code1 = ord(char_list[i])
code2 = code1 + 1
i = i + 1
i += 1
while i < n and code2 >= ord(char_list[i]):
code2 = code2 + 1
i = i + 1
code2 += 1
i += 1
result.append(code1)
result.append(code2)
return result
def uppercase_range(code1, code2):
"""
If the range of characters from code1 to code2-1 includes any
......@@ -63,6 +64,7 @@ def uppercase_range(code1, code2):
else:
return None
def lowercase_range(code1, code2):
"""
If the range of characters from code1 to code2-1 includes any
......@@ -76,6 +78,7 @@ def lowercase_range(code1, code2):
else:
return None
def CodeRanges(code_list):
"""
Given a list of codes as returned by chars_to_ranges, return
......@@ -86,6 +89,7 @@ def CodeRanges(code_list):
re_list.append(CodeRange(code_list[i], code_list[i + 1]))
return Alt(*re_list)
def CodeRange(code1, code2):
"""
CodeRange(code1, code2) is an RE which matches any character
......@@ -93,11 +97,12 @@ def CodeRange(code1, code2):
"""
if code1 <= nl_code < code2:
return Alt(RawCodeRange(code1, nl_code),
RawNewline,
RawCodeRange(nl_code + 1, code2))
RawNewline,
RawCodeRange(nl_code + 1, code2))
else:
return RawCodeRange(code1, code2)
#
# Abstract classes
#
......@@ -110,12 +115,12 @@ class RE(object):
re1 | re2 is an RE which matches either |re1| or |re2|
"""
nullable = 1 # True if this RE can match 0 input symbols
match_nl = 1 # True if this RE can match a string ending with '\n'
str = None # Set to a string to override the class's __str__ result
nullable = 1 # True if this RE can match 0 input symbols
match_nl = 1 # True if this RE can match a string ending with '\n'
str = None # Set to a string to override the class's __str__ result
def build_machine(self, machine, initial_state, final_state,
match_bol, nocase):
match_bol, nocase):
"""
This method should add states to |machine| to implement this
RE, starting at |initial_state| and ending at |final_state|.
......@@ -124,7 +129,7 @@ class RE(object):
letters should be treated as equivalent.
"""
raise NotImplementedError("%s.build_machine not implemented" %
self.__class__.__name__)
self.__class__.__name__)
def build_opt(self, m, initial_state, c):
"""
......@@ -160,18 +165,18 @@ class RE(object):
self.check_string(num, value)
if len(value) != 1:
raise Errors.PlexValueError("Invalid value for argument %d of Plex.%s."
"Expected a string of length 1, got: %s" % (
num, self.__class__.__name__, repr(value)))
"Expected a string of length 1, got: %s" % (
num, self.__class__.__name__, repr(value)))
def wrong_type(self, num, value, expected):
if type(value) == types.InstanceType:
got = "%s.%s instance" % (
value.__class__.__module__, value.__class__.__name__)
got = "%s.%s instance" % (
value.__class__.__module__, value.__class__.__name__)
else:
got = type(value).__name__
raise Errors.PlexTypeError("Invalid type for argument %d of Plex.%s "
"(expected %s, got %s" % (
num, self.__class__.__name__, expected, got))
"(expected %s, got %s" % (
num, self.__class__.__name__, expected, got))
#
# Primitive RE constructors
......@@ -211,6 +216,7 @@ class RE(object):
## def calc_str(self):
## return "Char(%s)" % repr(self.char)
def Char(c):
"""
Char(c) is an RE which matches the character |c|.
......@@ -222,6 +228,7 @@ def Char(c):
result.str = "Char(%s)" % repr(c)
return result
class RawCodeRange(RE):
"""
RawCodeRange(code1, code2) is a low-level RE which matches any character
......@@ -230,9 +237,9 @@ class RawCodeRange(RE):
"""
nullable = 0
match_nl = 0
range = None # (code, code)
uppercase_range = None # (code, code) or None
lowercase_range = None # (code, code) or None
range = None # (code, code)
uppercase_range = None # (code, code) or None
lowercase_range = None # (code, code) or None
def __init__(self, code1, code2):
self.range = (code1, code2)
......@@ -252,6 +259,7 @@ class RawCodeRange(RE):
def calc_str(self):
return "CodeRange(%d,%d)" % (self.code1, self.code2)
class _RawNewline(RE):
"""
RawNewline is a low-level RE which matches a newline character.
......@@ -266,6 +274,7 @@ class _RawNewline(RE):
s = self.build_opt(m, initial_state, EOL)
s.add_transition((nl_code, nl_code + 1), final_state)
RawNewline = _RawNewline()
......@@ -304,7 +313,7 @@ class Seq(RE):
i = len(re_list)
match_nl = 0
while i:
i = i - 1
i -= 1
re = re_list[i]
if re.match_nl:
match_nl = 1
......@@ -354,7 +363,7 @@ class Alt(RE):
non_nullable_res.append(re)
if re.match_nl:
match_nl = 1
i = i + 1
i += 1
self.nullable_res = nullable_res
self.non_nullable_res = non_nullable_res
self.nullable = nullable
......@@ -411,7 +420,7 @@ class SwitchCase(RE):
def build_machine(self, m, initial_state, final_state, match_bol, nocase):
self.re.build_machine(m, initial_state, final_state, match_bol,
self.nocase)
self.nocase)
def calc_str(self):
if self.nocase:
......@@ -434,6 +443,7 @@ Empty.__doc__ = \
"""
Empty.str = "Empty"
def Str1(s):
"""
Str1(s) is an RE which matches the literal string |s|.
......@@ -442,6 +452,7 @@ def Str1(s):
result.str = "Str(%s)" % repr(s)
return result
def Str(*strs):
"""
Str(s) is an RE which matches the literal string |s|.
......@@ -454,6 +465,7 @@ def Str(*strs):
result.str = "Str(%s)" % ','.join(map(repr, strs))
return result
def Any(s):
"""
Any(s) is an RE which matches any character in the string |s|.
......@@ -463,6 +475,7 @@ def Any(s):
result.str = "Any(%s)" % repr(s)
return result
def AnyBut(s):
"""
AnyBut(s) is an RE which matches any character (including
......@@ -475,6 +488,7 @@ def AnyBut(s):
result.str = "AnyBut(%s)" % repr(s)
return result
AnyChar = AnyBut("")
AnyChar.__doc__ = \
"""
......@@ -482,7 +496,8 @@ AnyChar.__doc__ = \
"""
AnyChar.str = "AnyChar"
def Range(s1, s2 = None):
def Range(s1, s2=None):
"""
Range(c1, c2) is an RE which matches any single character in the range
|c1| to |c2| inclusive.
......@@ -495,11 +510,12 @@ def Range(s1, s2 = None):
else:
ranges = []
for i in range(0, len(s1), 2):
ranges.append(CodeRange(ord(s1[i]), ord(s1[i+1]) + 1))
ranges.append(CodeRange(ord(s1[i]), ord(s1[i + 1]) + 1))
result = Alt(*ranges)
result.str = "Range(%s)" % repr(s1)
return result
def Opt(re):
"""
Opt(re) is an RE which matches either |re| or the empty string.
......@@ -508,6 +524,7 @@ def Opt(re):
result.str = "Opt(%s)" % re
return result
def Rep(re):
"""
Rep(re) is an RE which matches zero or more repetitions of |re|.
......@@ -516,12 +533,14 @@ def Rep(re):
result.str = "Rep(%s)" % re
return result
def NoCase(re):
"""
NoCase(re) is an RE which matches the same strings as RE, but treating
upper and lower case letters as equivalent.
"""
return SwitchCase(re, nocase = 1)
return SwitchCase(re, nocase=1)
def Case(re):
"""
......@@ -529,7 +548,7 @@ def Case(re):
upper and lower case letters as distinct, i.e. it cancels the effect
of any enclosing NoCase().
"""
return SwitchCase(re, nocase = 0)
return SwitchCase(re, nocase=0)
#
# RE Constants
......
......@@ -31,7 +31,7 @@ cdef class Scanner:
@cython.locals(input_state=long)
cdef next_char(self)
@cython.locals(action=Action)
cdef tuple read(self)
cpdef tuple read(self)
cdef tuple scan_a_token(self)
cdef tuple position(self)
......
......@@ -10,6 +10,7 @@
from __future__ import absolute_import
import cython
cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object)
from . import Errors
......@@ -19,317 +20,318 @@ NOT_FOUND = object()
class Scanner(object):
"""
A Scanner is used to read tokens from a stream of characters
using the token set specified by a Plex.Lexicon.
Constructor:
Scanner(lexicon, stream, name = '')
"""
A Scanner is used to read tokens from a stream of characters
using the token set specified by a Plex.Lexicon.
See the docstring of the __init__ method for details.
Constructor:
Methods:
Scanner(lexicon, stream, name = '')
See the docstrings of the individual methods for more
information.
See the docstring of the __init__ method for details.
read() --> (value, text)
Reads the next lexical token from the stream.
Methods:
position() --> (name, line, col)
Returns the position of the last token read using the
read() method.
See the docstrings of the individual methods for more
information.
begin(state_name)
Causes scanner to change state.
read() --> (value, text)
Reads the next lexical token from the stream.
produce(value [, text])
Causes return of a token value to the caller of the
Scanner.
position() --> (name, line, col)
Returns the position of the last token read using the
read() method.
"""
begin(state_name)
Causes scanner to change state.
# lexicon = None # Lexicon
# stream = None # file-like object
# name = ''
# buffer = ''
# buf_start_pos = 0 # position in input of start of buffer
# next_pos = 0 # position in input of next char to read
# cur_pos = 0 # position in input of current char
# cur_line = 1 # line number of current char
# cur_line_start = 0 # position in input of start of current line
# start_pos = 0 # position in input of start of token
# start_line = 0 # line number of start of token
# start_col = 0 # position in line of start of token
# text = None # text of last token read
# initial_state = None # Node
# state_name = '' # Name of initial state
# queue = None # list of tokens to be returned
# trace = 0
produce(value [, text])
Causes return of a token value to the caller of the
Scanner.
def __init__(self, lexicon, stream, name = '', initial_pos = None):
"""
Scanner(lexicon, stream, name = '')
|lexicon| is a Plex.Lexicon instance specifying the lexical tokens
to be recognised.
|stream| can be a file object or anything which implements a
compatible read() method.
|name| is optional, and may be the name of the file being
scanned or any other identifying string.
"""
self.trace = 0
self.buffer = u''
self.buf_start_pos = 0
self.next_pos = 0
self.cur_pos = 0
self.cur_line = 1
self.start_pos = 0
self.start_line = 0
self.start_col = 0
self.text = None
self.state_name = None
self.lexicon = lexicon
self.stream = stream
self.name = name
self.queue = []
self.initial_state = None
self.begin('')
self.next_pos = 0
self.cur_pos = 0
self.cur_line_start = 0
self.cur_char = BOL
self.input_state = 1
if initial_pos is not None:
self.cur_line, self.cur_line_start = initial_pos[1], -initial_pos[2]
def read(self):
"""
Read the next lexical token from the stream and return a
tuple (value, text), where |value| is the value associated with
the token as specified by the Lexicon, and |text| is the actual
string read from the stream. Returns (None, '') on end of file.
"""
queue = self.queue
while not queue:
self.text, action = self.scan_a_token()
if action is None:
self.produce(None)
self.eof()
else:
value = action.perform(self, self.text)
if value is not None:
self.produce(value)
result = queue[0]
del queue[0]
return result
def scan_a_token(self):
"""
Read the next input sequence recognised by the machine
and return (text, action). Returns ('', None) on end of
file.
"""
self.start_pos = self.cur_pos
self.start_line = self.cur_line
self.start_col = self.cur_pos - self.cur_line_start
action = self.run_machine_inlined()
if action is not None:
if self.trace:
print("Scanner: read: Performing %s %d:%d" % (
action, self.start_pos, self.cur_pos))
text = self.buffer[self.start_pos - self.buf_start_pos :
self.cur_pos - self.buf_start_pos]
return (text, action)
else:
if self.cur_pos == self.start_pos:
if self.cur_char is EOL:
self.next_char()
if self.cur_char is None or self.cur_char is EOF:
return (u'', None)
raise Errors.UnrecognizedInput(self, self.state_name)
def run_machine_inlined(self):
"""
Inlined version of run_machine for speed.
"""
state = self.initial_state
cur_pos = self.cur_pos
cur_line = self.cur_line
cur_line_start = self.cur_line_start
cur_char = self.cur_char
input_state = self.input_state
next_pos = self.next_pos
buffer = self.buffer
buf_start_pos = self.buf_start_pos
buf_len = len(buffer)
b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
None, 0, 0, 0, u'', 0, 0
trace = self.trace
while 1:
if trace: #TRACE#
print("State %d, %d/%d:%s -->" % ( #TRACE#
state['number'], input_state, cur_pos, repr(cur_char))) #TRACE#
# Begin inlined self.save_for_backup()
#action = state.action #@slow
action = state['action'] #@fast
if action is not None:
# lexicon = None # Lexicon
# stream = None # file-like object
# name = ''
# buffer = ''
# buf_start_pos = 0 # position in input of start of buffer
# next_pos = 0 # position in input of next char to read
# cur_pos = 0 # position in input of current char
# cur_line = 1 # line number of current char
# cur_line_start = 0 # position in input of start of current line
# start_pos = 0 # position in input of start of token
# start_line = 0 # line number of start of token
# start_col = 0 # position in line of start of token
# text = None # text of last token read
# initial_state = None # Node
# state_name = '' # Name of initial state
# queue = None # list of tokens to be returned
# trace = 0
def __init__(self, lexicon, stream, name='', initial_pos=None):
"""
Scanner(lexicon, stream, name = '')
|lexicon| is a Plex.Lexicon instance specifying the lexical tokens
to be recognised.
|stream| can be a file object or anything which implements a
compatible read() method.
|name| is optional, and may be the name of the file being
scanned or any other identifying string.
"""
self.trace = 0
self.buffer = u''
self.buf_start_pos = 0
self.next_pos = 0
self.cur_pos = 0
self.cur_line = 1
self.start_pos = 0
self.start_line = 0
self.start_col = 0
self.text = None
self.state_name = None
self.lexicon = lexicon
self.stream = stream
self.name = name
self.queue = []
self.initial_state = None
self.begin('')
self.next_pos = 0
self.cur_pos = 0
self.cur_line_start = 0
self.cur_char = BOL
self.input_state = 1
if initial_pos is not None:
self.cur_line, self.cur_line_start = initial_pos[1], -initial_pos[2]
def read(self):
"""
Read the next lexical token from the stream and return a
tuple (value, text), where |value| is the value associated with
the token as specified by the Lexicon, and |text| is the actual
string read from the stream. Returns (None, '') on end of file.
"""
queue = self.queue
while not queue:
self.text, action = self.scan_a_token()
if action is None:
self.produce(None)
self.eof()
else:
value = action.perform(self, self.text)
if value is not None:
self.produce(value)
result = queue[0]
del queue[0]
return result
def scan_a_token(self):
"""
Read the next input sequence recognised by the machine
and return (text, action). Returns ('', None) on end of
file.
"""
self.start_pos = self.cur_pos
self.start_line = self.cur_line
self.start_col = self.cur_pos - self.cur_line_start
action = self.run_machine_inlined()
if action is not None:
if self.trace:
print("Scanner: read: Performing %s %d:%d" % (
action, self.start_pos, self.cur_pos))
text = self.buffer[
self.start_pos - self.buf_start_pos:
self.cur_pos - self.buf_start_pos]
return (text, action)
else:
if self.cur_pos == self.start_pos:
if self.cur_char is EOL:
self.next_char()
if self.cur_char is None or self.cur_char is EOF:
return (u'', None)
raise Errors.UnrecognizedInput(self, self.state_name)
def run_machine_inlined(self):
"""
Inlined version of run_machine for speed.
"""
state = self.initial_state
cur_pos = self.cur_pos
cur_line = self.cur_line
cur_line_start = self.cur_line_start
cur_char = self.cur_char
input_state = self.input_state
next_pos = self.next_pos
buffer = self.buffer
buf_start_pos = self.buf_start_pos
buf_len = len(buffer)
b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos
# End inlined self.save_for_backup()
c = cur_char
#new_state = state.new_state(c) #@slow
new_state = state.get(c, NOT_FOUND) #@fast
if new_state is NOT_FOUND: #@fast
new_state = c and state.get('else') #@fast
if new_state:
if trace: #TRACE#
print("State %d" % new_state['number']) #TRACE#
state = new_state
# Begin inlined: self.next_char()
None, 0, 0, 0, u'', 0, 0
trace = self.trace
while 1:
if trace: #TRACE#
print("State %d, %d/%d:%s -->" % ( #TRACE#
state['number'], input_state, cur_pos, repr(cur_char))) #TRACE#
# Begin inlined self.save_for_backup()
#action = state.action #@slow
action = state['action'] #@fast
if action is not None:
b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos
# End inlined self.save_for_backup()
c = cur_char
#new_state = state.new_state(c) #@slow
new_state = state.get(c, NOT_FOUND) #@fast
if new_state is NOT_FOUND: #@fast
new_state = c and state.get('else') #@fast
if new_state:
if trace: #TRACE#
print("State %d" % new_state['number']) #TRACE#
state = new_state
# Begin inlined: self.next_char()
if input_state == 1:
cur_pos = next_pos
# Begin inlined: c = self.read_char()
buf_index = next_pos - buf_start_pos
if buf_index < buf_len:
c = buffer[buf_index]
next_pos += 1
else:
discard = self.start_pos - buf_start_pos
data = self.stream.read(0x1000)
buffer = self.buffer[discard:] + data
self.buffer = buffer
buf_start_pos += discard
self.buf_start_pos = buf_start_pos
buf_len = len(buffer)
buf_index -= discard
if data:
c = buffer[buf_index]
next_pos += 1
else:
c = u''
# End inlined: c = self.read_char()
if c == u'\n':
cur_char = EOL
input_state = 2
elif not c:
cur_char = EOL
input_state = 4
else:
cur_char = c
elif input_state == 2:
cur_char = u'\n'
input_state = 3
elif input_state == 3:
cur_line += 1
cur_line_start = cur_pos = next_pos
cur_char = BOL
input_state = 1
elif input_state == 4:
cur_char = EOF
input_state = 5
else: # input_state = 5
cur_char = u''
# End inlined self.next_char()
else: # not new_state
if trace: #TRACE#
print("blocked") #TRACE#
# Begin inlined: action = self.back_up()
if b_action is not None:
(action, cur_pos, cur_line, cur_line_start,
cur_char, input_state, next_pos) = \
(b_action, b_cur_pos, b_cur_line, b_cur_line_start,
b_cur_char, b_input_state, b_next_pos)
else:
action = None
break # while 1
# End inlined: action = self.back_up()
self.cur_pos = cur_pos
self.cur_line = cur_line
self.cur_line_start = cur_line_start
self.cur_char = cur_char
self.input_state = input_state
self.next_pos = next_pos
if trace: #TRACE#
if action is not None: #TRACE#
print("Doing %s" % action) #TRACE#
return action
def next_char(self):
input_state = self.input_state
if self.trace:
print("Scanner: next: %s [%d] %d" % (" " * 20, input_state, self.cur_pos))
if input_state == 1:
cur_pos = next_pos
# Begin inlined: c = self.read_char()
buf_index = next_pos - buf_start_pos
if buf_index < buf_len:
c = buffer[buf_index]
next_pos = next_pos + 1
else:
discard = self.start_pos - buf_start_pos
data = self.stream.read(0x1000)
buffer = self.buffer[discard:] + data
self.buffer = buffer
buf_start_pos = buf_start_pos + discard
self.buf_start_pos = buf_start_pos
buf_len = len(buffer)
buf_index = buf_index - discard
if data:
c = buffer[buf_index]
next_pos = next_pos + 1
self.cur_pos = self.next_pos
c = self.read_char()
if c == u'\n':
self.cur_char = EOL
self.input_state = 2
elif not c:
self.cur_char = EOL
self.input_state = 4
else:
c = u''
# End inlined: c = self.read_char()
if c == u'\n':
cur_char = EOL
input_state = 2
elif not c:
cur_char = EOL
input_state = 4
else:
cur_char = c
self.cur_char = c
elif input_state == 2:
cur_char = u'\n'
input_state = 3
self.cur_char = u'\n'
self.input_state = 3
elif input_state == 3:
cur_line = cur_line + 1
cur_line_start = cur_pos = next_pos
cur_char = BOL
input_state = 1
self.cur_line += 1
self.cur_line_start = self.cur_pos = self.next_pos
self.cur_char = BOL
self.input_state = 1
elif input_state == 4:
cur_char = EOF
input_state = 5
else: # input_state = 5
cur_char = u''
# End inlined self.next_char()
else: # not new_state
if trace: #TRACE#
print("blocked") #TRACE#
# Begin inlined: action = self.back_up()
if b_action is not None:
(action, cur_pos, cur_line, cur_line_start,
cur_char, input_state, next_pos) = \
(b_action, b_cur_pos, b_cur_line, b_cur_line_start,
b_cur_char, b_input_state, b_next_pos)
else:
action = None
break # while 1
# End inlined: action = self.back_up()
self.cur_pos = cur_pos
self.cur_line = cur_line
self.cur_line_start = cur_line_start
self.cur_char = cur_char
self.input_state = input_state
self.next_pos = next_pos
if trace: #TRACE#
if action is not None: #TRACE#
print("Doing %s" % action) #TRACE#
return action
def next_char(self):
input_state = self.input_state
if self.trace:
print("Scanner: next: %s [%d] %d" % (" "*20, input_state, self.cur_pos))
if input_state == 1:
self.cur_pos = self.next_pos
c = self.read_char()
if c == u'\n':
self.cur_char = EOL
self.input_state = 2
elif not c:
self.cur_char = EOL
self.input_state = 4
else:
self.cur_char = c
elif input_state == 2:
self.cur_char = u'\n'
self.input_state = 3
elif input_state == 3:
self.cur_line = self.cur_line + 1
self.cur_line_start = self.cur_pos = self.next_pos
self.cur_char = BOL
self.input_state = 1
elif input_state == 4:
self.cur_char = EOF
self.input_state = 5
else: # input_state = 5
self.cur_char = u''
if self.trace:
print("--> [%d] %d %s" % (input_state, self.cur_pos, repr(self.cur_char)))
def position(self):
"""
Return a tuple (name, line, col) representing the location of
the last token read using the read() method. |name| is the
name that was provided to the Scanner constructor; |line|
is the line number in the stream (1-based); |col| is the
position within the line of the first character of the token
(0-based).
"""
return (self.name, self.start_line, self.start_col)
def get_position(self):
"""Python accessible wrapper around position(), only for error reporting.
"""
return self.position()
def begin(self, state_name):
"""Set the current state of the scanner to the named state."""
self.initial_state = (
self.lexicon.get_initial_state(state_name))
self.state_name = state_name
def produce(self, value, text = None):
"""
Called from an action procedure, causes |value| to be returned
as the token value from read(). If |text| is supplied, it is
returned in place of the scanned text.
produce() can be called more than once during a single call to an action
procedure, in which case the tokens are queued up and returned one
at a time by subsequent calls to read(), until the queue is empty,
whereupon scanning resumes.
"""
if text is None:
text = self.text
self.queue.append((value, text))
def eof(self):
"""
Override this method if you want something to be done at
end of file.
"""
self.cur_char = EOF
self.input_state = 5
else: # input_state = 5
self.cur_char = u''
if self.trace:
print("--> [%d] %d %s" % (input_state, self.cur_pos, repr(self.cur_char)))
def position(self):
"""
Return a tuple (name, line, col) representing the location of
the last token read using the read() method. |name| is the
name that was provided to the Scanner constructor; |line|
is the line number in the stream (1-based); |col| is the
position within the line of the first character of the token
(0-based).
"""
return (self.name, self.start_line, self.start_col)
def get_position(self):
"""Python accessible wrapper around position(), only for error reporting.
"""
return self.position()
def begin(self, state_name):
"""Set the current state of the scanner to the named state."""
self.initial_state = (
self.lexicon.get_initial_state(state_name))
self.state_name = state_name
def produce(self, value, text=None):
"""
Called from an action procedure, causes |value| to be returned
as the token value from read(). If |text| is supplied, it is
returned in place of the scanned text.
produce() can be called more than once during a single call to an action
procedure, in which case the tokens are queued up and returned one
at a time by subsequent calls to read(), until the queue is empty,
whereupon scanning resumes.
"""
if text is None:
text = self.text
self.queue.append((value, text))
def eof(self):
"""
Override this method if you want something to be done at
end of file.
"""
......@@ -13,147 +13,146 @@ from .Errors import PlexError
class RegexpSyntaxError(PlexError):
pass
pass
def re(s):
"""
Convert traditional string representation of regular expression |s|
into Plex representation.
"""
return REParser(s).parse_re()
"""
Convert traditional string representation of regular expression |s|
into Plex representation.
"""
return REParser(s).parse_re()
class REParser(object):
def __init__(self, s):
self.s = s
self.i = -1
self.end = 0
self.next()
def parse_re(self):
re = self.parse_alt()
if not self.end:
self.error("Unexpected %s" % repr(self.c))
return re
def parse_alt(self):
"""Parse a set of alternative regexps."""
re = self.parse_seq()
if self.c == '|':
re_list = [re]
while self.c == '|':
def __init__(self, s):
self.s = s
self.i = -1
self.end = 0
self.next()
re_list.append(self.parse_seq())
re = Alt(*re_list)
return re
def parse_seq(self):
"""Parse a sequence of regexps."""
re_list = []
while not self.end and not self.c in "|)":
re_list.append(self.parse_mod())
return Seq(*re_list)
def parse_mod(self):
"""Parse a primitive regexp followed by *, +, ? modifiers."""
re = self.parse_prim()
while not self.end and self.c in "*+?":
if self.c == '*':
re = Rep(re)
elif self.c == '+':
re = Rep1(re)
else: # self.c == '?'
re = Opt(re)
self.next()
return re
def parse_prim(self):
"""Parse a primitive regexp."""
c = self.get()
if c == '.':
re = AnyBut("\n")
elif c == '^':
re = Bol
elif c == '$':
re = Eol
elif c == '(':
re = self.parse_alt()
self.expect(')')
elif c == '[':
re = self.parse_charset()
self.expect(']')
else:
if c == '\\':
def parse_re(self):
re = self.parse_alt()
if not self.end:
self.error("Unexpected %s" % repr(self.c))
return re
def parse_alt(self):
"""Parse a set of alternative regexps."""
re = self.parse_seq()
if self.c == '|':
re_list = [re]
while self.c == '|':
self.next()
re_list.append(self.parse_seq())
re = Alt(*re_list)
return re
def parse_seq(self):
"""Parse a sequence of regexps."""
re_list = []
while not self.end and not self.c in "|)":
re_list.append(self.parse_mod())
return Seq(*re_list)
def parse_mod(self):
"""Parse a primitive regexp followed by *, +, ? modifiers."""
re = self.parse_prim()
while not self.end and self.c in "*+?":
if self.c == '*':
re = Rep(re)
elif self.c == '+':
re = Rep1(re)
else: # self.c == '?'
re = Opt(re)
self.next()
return re
def parse_prim(self):
"""Parse a primitive regexp."""
c = self.get()
re = Char(c)
return re
def parse_charset(self):
"""Parse a charset. Does not include the surrounding []."""
char_list = []
invert = 0
if self.c == '^':
invert = 1
self.next()
if self.c == ']':
char_list.append(']')
self.next()
while not self.end and self.c != ']':
c1 = self.get()
if self.c == '-' and self.lookahead(1) != ']':
if c == '.':
re = AnyBut("\n")
elif c == '^':
re = Bol
elif c == '$':
re = Eol
elif c == '(':
re = self.parse_alt()
self.expect(')')
elif c == '[':
re = self.parse_charset()
self.expect(']')
else:
if c == '\\':
c = self.get()
re = Char(c)
return re
def parse_charset(self):
"""Parse a charset. Does not include the surrounding []."""
char_list = []
invert = 0
if self.c == '^':
invert = 1
self.next()
if self.c == ']':
char_list.append(']')
self.next()
while not self.end and self.c != ']':
c1 = self.get()
if self.c == '-' and self.lookahead(1) != ']':
self.next()
c2 = self.get()
for a in xrange(ord(c1), ord(c2) + 1):
char_list.append(chr(a))
else:
char_list.append(c1)
chars = ''.join(char_list)
if invert:
return AnyBut(chars)
else:
return Any(chars)
def next(self):
"""Advance to the next char."""
s = self.s
i = self.i = self.i + 1
if i < len(s):
self.c = s[i]
else:
self.c = ''
self.end = 1
def get(self):
if self.end:
self.error("Premature end of string")
c = self.c
self.next()
c2 = self.get()
for a in xrange(ord(c1), ord(c2) + 1):
char_list.append(chr(a))
else:
char_list.append(c1)
chars = ''.join(char_list)
if invert:
return AnyBut(chars)
else:
return Any(chars)
def next(self):
"""Advance to the next char."""
s = self.s
i = self.i = self.i + 1
if i < len(s):
self.c = s[i]
else:
self.c = ''
self.end = 1
def get(self):
if self.end:
self.error("Premature end of string")
c = self.c
self.next()
return c
def lookahead(self, n):
"""Look ahead n chars."""
j = self.i + n
if j < len(self.s):
return self.s[j]
else:
return ''
def expect(self, c):
"""
Expect to find character |c| at current position.
Raises an exception otherwise.
"""
if self.c == c:
self.next()
else:
self.error("Missing %s" % repr(c))
def error(self, mess):
"""Raise exception to signal syntax error in regexp."""
raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
repr(self.s), self.i, mess))
return c
def lookahead(self, n):
"""Look ahead n chars."""
j = self.i + n
if j < len(self.s):
return self.s[j]
else:
return ''
def expect(self, c):
"""
Expect to find character |c| at current position.
Raises an exception otherwise.
"""
if self.c == c:
self.next()
else:
self.error("Missing %s" % repr(c))
def error(self, mess):
"""Raise exception to signal syntax error in regexp."""
raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
repr(self.s), self.i, mess))
#
# Plex - Transition Maps
# Plex - Transition Maps
#
# This version represents state sets directly as dicts for speed.
# This version represents state sets directly as dicts for speed.
#
from __future__ import absolute_import
......@@ -10,229 +10,231 @@ from sys import maxint as maxint
class TransitionMap(object):
"""
A TransitionMap maps an input event to a set of states.
An input event is one of: a range of character codes,
the empty string (representing an epsilon move), or one
of the special symbols BOL, EOL, EOF.
For characters, this implementation compactly represents
the map by means of a list:
[code_0, states_0, code_1, states_1, code_2, states_2,
..., code_n-1, states_n-1, code_n]
where |code_i| is a character code, and |states_i| is a
set of states corresponding to characters with codes |c|
in the range |code_i| <= |c| <= |code_i+1|.
The following invariants hold:
n >= 1
code_0 == -maxint
code_n == maxint
code_i < code_i+1 for i in 0..n-1
states_0 == states_n-1
Mappings for the special events '', BOL, EOL, EOF are
kept separately in a dictionary.
"""
map = None # The list of codes and states
special = None # Mapping for special events
def __init__(self, map = None, special = None):
if not map:
map = [-maxint, {}, maxint]
if not special:
special = {}
self.map = map
self.special = special
#self.check() ###
def add(self, event, new_state,
TupleType = tuple):
"""
Add transition to |new_state| on |event|.
A TransitionMap maps an input event to a set of states.
An input event is one of: a range of character codes,
the empty string (representing an epsilon move), or one
of the special symbols BOL, EOL, EOF.
For characters, this implementation compactly represents
the map by means of a list:
[code_0, states_0, code_1, states_1, code_2, states_2,
..., code_n-1, states_n-1, code_n]
where |code_i| is a character code, and |states_i| is a
set of states corresponding to characters with codes |c|
in the range |code_i| <= |c| <= |code_i+1|.
The following invariants hold:
n >= 1
code_0 == -maxint
code_n == maxint
code_i < code_i+1 for i in 0..n-1
states_0 == states_n-1
Mappings for the special events '', BOL, EOL, EOF are
kept separately in a dictionary.
"""
if type(event) is TupleType:
code0, code1 = event
i = self.split(code0)
j = self.split(code1)
map = self.map
while i < j:
map[i + 1][new_state] = 1
i = i + 2
else:
self.get_special(event)[new_state] = 1
def add_set(self, event, new_set,
TupleType = tuple):
"""
Add transitions to the states in |new_set| on |event|.
"""
if type(event) is TupleType:
code0, code1 = event
i = self.split(code0)
j = self.split(code1)
map = self.map
while i < j:
map[i + 1].update(new_set)
i = i + 2
else:
self.get_special(event).update(new_set)
def get_epsilon(self,
none = None):
"""
Return the mapping for epsilon, or None.
"""
return self.special.get('', none)
def iteritems(self,
len = len):
"""
Return the mapping as an iterable of ((code1, code2), state_set) and
(special_event, state_set) pairs.
"""
result = []
map = self.map
else_set = map[1]
i = 0
n = len(map) - 1
code0 = map[0]
while i < n:
set = map[i + 1]
code1 = map[i + 2]
if set or else_set:
result.append(((code0, code1), set))
code0 = code1
i = i + 2
for event, set in self.special.iteritems():
if set:
result.append((event, set))
return iter(result)
items = iteritems
# ------------------- Private methods --------------------
def split(self, code,
len = len, maxint = maxint):
"""
Search the list for the position of the split point for |code|,
inserting a new split point if necessary. Returns index |i| such
that |code| == |map[i]|.
"""
# We use a funky variation on binary search.
map = self.map
hi = len(map) - 1
# Special case: code == map[-1]
if code == maxint:
return hi
# General case
lo = 0
# loop invariant: map[lo] <= code < map[hi] and hi - lo >= 2
while hi - lo >= 4:
# Find midpoint truncated to even index
mid = ((lo + hi) // 2) & ~1
if code < map[mid]:
hi = mid
else:
lo = mid
# map[lo] <= code < map[hi] and hi - lo == 2
if map[lo] == code:
return lo
else:
map[hi:hi] = [code, map[hi - 1].copy()]
#self.check() ###
return hi
def get_special(self, event):
"""
Get state set for special event, adding a new entry if necessary.
"""
special = self.special
set = special.get(event, None)
if not set:
set = {}
special[event] = set
return set
# --------------------- Conversion methods -----------------------
def __str__(self):
map_strs = []
map = self.map
n = len(map)
i = 0
while i < n:
code = map[i]
if code == -maxint:
code_str = "-inf"
elif code == maxint:
code_str = "inf"
else:
code_str = str(code)
map_strs.append(code_str)
i = i + 1
if i < n:
map_strs.append(state_set_str(map[i]))
i = i + 1
special_strs = {}
for event, set in self.special.iteritems():
special_strs[event] = state_set_str(set)
return "[%s]+%s" % (
','.join(map_strs),
special_strs
)
# --------------------- Debugging methods -----------------------
def check(self):
"""Check data structure integrity."""
if not self.map[-3] < self.map[-1]:
print(self)
assert 0
def dump(self, file):
map = self.map
i = 0
n = len(map) - 1
while i < n:
self.dump_range(map[i], map[i + 2], map[i + 1], file)
i = i + 2
for event, set in self.special.iteritems():
if set:
if not event:
event = 'empty'
self.dump_trans(event, set, file)
def dump_range(self, code0, code1, set, file):
if set:
if code0 == -maxint:
if code1 == maxint:
k = "any"
map = None # The list of codes and states
special = None # Mapping for special events
def __init__(self, map=None, special=None):
if not map:
map = [-maxint, {}, maxint]
if not special:
special = {}
self.map = map
self.special = special
#self.check() ###
def add(self, event, new_state,
TupleType=tuple):
"""
Add transition to |new_state| on |event|.
"""
if type(event) is TupleType:
code0, code1 = event
i = self.split(code0)
j = self.split(code1)
map = self.map
while i < j:
map[i + 1][new_state] = 1
i += 2
else:
self.get_special(event)[new_state] = 1
def add_set(self, event, new_set,
TupleType=tuple):
"""
Add transitions to the states in |new_set| on |event|.
"""
if type(event) is TupleType:
code0, code1 = event
i = self.split(code0)
j = self.split(code1)
map = self.map
while i < j:
map[i + 1].update(new_set)
i += 2
else:
self.get_special(event).update(new_set)
def get_epsilon(self,
none=None):
"""
Return the mapping for epsilon, or None.
"""
return self.special.get('', none)
def iteritems(self,
len=len):
"""
Return the mapping as an iterable of ((code1, code2), state_set) and
(special_event, state_set) pairs.
"""
result = []
map = self.map
else_set = map[1]
i = 0
n = len(map) - 1
code0 = map[0]
while i < n:
set = map[i + 1]
code1 = map[i + 2]
if set or else_set:
result.append(((code0, code1), set))
code0 = code1
i += 2
for event, set in self.special.iteritems():
if set:
result.append((event, set))
return iter(result)
items = iteritems
# ------------------- Private methods --------------------
def split(self, code,
len=len, maxint=maxint):
"""
Search the list for the position of the split point for |code|,
inserting a new split point if necessary. Returns index |i| such
that |code| == |map[i]|.
"""
# We use a funky variation on binary search.
map = self.map
hi = len(map) - 1
# Special case: code == map[-1]
if code == maxint:
return hi
# General case
lo = 0
# loop invariant: map[lo] <= code < map[hi] and hi - lo >= 2
while hi - lo >= 4:
# Find midpoint truncated to even index
mid = ((lo + hi) // 2) & ~1
if code < map[mid]:
hi = mid
else:
lo = mid
# map[lo] <= code < map[hi] and hi - lo == 2
if map[lo] == code:
return lo
else:
k = "< %s" % self.dump_char(code1)
elif code1 == maxint:
k = "> %s" % self.dump_char(code0 - 1)
elif code0 == code1 - 1:
k = self.dump_char(code0)
else:
k = "%s..%s" % (self.dump_char(code0),
self.dump_char(code1 - 1))
self.dump_trans(k, set, file)
def dump_char(self, code):
if 0 <= code <= 255:
return repr(chr(code))
else:
return "chr(%d)" % code
def dump_trans(self, key, set, file):
file.write(" %s --> %s\n" % (key, self.dump_set(set)))
def dump_set(self, set):
return state_set_str(set)
map[hi:hi] = [code, map[hi - 1].copy()]
#self.check() ###
return hi
def get_special(self, event):
"""
Get state set for special event, adding a new entry if necessary.
"""
special = self.special
set = special.get(event, None)
if not set:
set = {}
special[event] = set
return set
# --------------------- Conversion methods -----------------------
def __str__(self):
map_strs = []
map = self.map
n = len(map)
i = 0
while i < n:
code = map[i]
if code == -maxint:
code_str = "-inf"
elif code == maxint:
code_str = "inf"
else:
code_str = str(code)
map_strs.append(code_str)
i += 1
if i < n:
map_strs.append(state_set_str(map[i]))
i += 1
special_strs = {}
for event, set in self.special.iteritems():
special_strs[event] = state_set_str(set)
return "[%s]+%s" % (
','.join(map_strs),
special_strs
)
# --------------------- Debugging methods -----------------------
def check(self):
"""Check data structure integrity."""
if not self.map[-3] < self.map[-1]:
print(self)
assert 0
def dump(self, file):
map = self.map
i = 0
n = len(map) - 1
while i < n:
self.dump_range(map[i], map[i + 2], map[i + 1], file)
i += 2
for event, set in self.special.iteritems():
if set:
if not event:
event = 'empty'
self.dump_trans(event, set, file)
def dump_range(self, code0, code1, set, file):
if set:
if code0 == -maxint:
if code1 == maxint:
k = "any"
else:
k = "< %s" % self.dump_char(code1)
elif code1 == maxint:
k = "> %s" % self.dump_char(code0 - 1)
elif code0 == code1 - 1:
k = self.dump_char(code0)
else:
k = "%s..%s" % (self.dump_char(code0),
self.dump_char(code1 - 1))
self.dump_trans(k, set, file)
def dump_char(self, code):
if 0 <= code <= 255:
return repr(chr(code))
else:
return "chr(%d)" % code
def dump_trans(self, key, set, file):
file.write(" %s --> %s\n" % (key, self.dump_set(set)))
def dump_set(self, set):
return state_set_str(set)
#
# State set manipulation functions
......@@ -243,4 +245,4 @@ class TransitionMap(object):
# set1[state] = 1
def state_set_str(set):
return "[%s]" % ','.join(["S%d" % state.number for state in set])
return "[%s]" % ','.join(["S%d" % state.number for state in set])
# cython.* namespace for pure mode.
__version__ = "0.21"
__version__ = "0.21.1pre"
# BEGIN shameless copy from Cython/minivect/minitypes.py
......
#################### cfunc.to_py ####################
@cname("{{cname}}")
cdef object {{cname}}({{return_type.ctype}} (*f)({{ ', '.join(arg.type_cname for arg in args) }}) {{except_clause}}):
def wrap({{ ', '.join('{arg.ctype} {arg.name}'.format(arg=arg) for arg in args) }}):
"""wrap({{', '.join(('{arg.name}: {arg.type_displayname}'.format(arg=arg) if arg.type_displayname else arg.name) for arg in args)}}){{if return_type.type_displayname}} -> {{return_type.type_displayname}}{{endif}}"""
{{'' if return_type.type.is_void else 'return '}}f({{ ', '.join(arg.name for arg in args) }})
return wrap
......@@ -545,13 +545,12 @@ static PyObject *__Pyx_CyFunction_descr_get(PyObject *func, PyObject *obj, PyObj
if (m->flags & __Pyx_CYFUNCTION_CLASSMETHOD) {
if (type == NULL)
type = (PyObject *)(Py_TYPE(obj));
return PyMethod_New(func,
type, (PyObject *)(Py_TYPE(type)));
return __Pyx_PyMethod_New(func, type, (PyObject *)(Py_TYPE(type)));
}
if (obj == Py_None)
obj = NULL;
return PyMethod_New(func, obj, type);
return __Pyx_PyMethod_New(func, obj, type);
}
static PyObject*
......
......@@ -213,6 +213,13 @@ static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject
PyErr_SetObject(type, value);
if (tb) {
#if CYTHON_COMPILING_IN_PYPY
PyObject *tmp_type, *tmp_value, *tmp_tb;
PyErr_Fetch(tmp_type, tmp_value, tmp_tb);
Py_INCREF(tb);
PyErr_Restore(tmp_type, tmp_value, tb);
Py_XDECREF(tmp_tb);
#else
PyThreadState *tstate = PyThreadState_GET();
PyObject* tmp_tb = tstate->curexc_traceback;
if (tb != tmp_tb) {
......@@ -220,6 +227,7 @@ static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject
tstate->curexc_traceback = tb;
Py_XDECREF(tmp_tb);
}
#endif
}
bad:
......
......@@ -62,9 +62,6 @@
#if PY_MAJOR_VERSION >= 3
#define Py_TPFLAGS_CHECKTYPES 0
#define Py_TPFLAGS_HAVE_INDEX 0
#endif
#if PY_MAJOR_VERSION >= 3
#define Py_TPFLAGS_HAVE_NEWBUFFER 0
#endif
......@@ -158,6 +155,12 @@
#define PyBoolObject PyLongObject
#endif
#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY
#ifndef PyUnicode_InternFromString
#define PyUnicode_InternFromString(s) PyUnicode_FromString(s)
#endif
#endif
#if PY_VERSION_HEX < 0x030200A4
typedef long Py_hash_t;
#define __Pyx_PyInt_FromHash_t PyInt_FromLong
......@@ -168,7 +171,9 @@
#endif
#if PY_MAJOR_VERSION >= 3
#define PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func))
#define __Pyx_PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func))
#else
#define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass)
#endif
/* inline attribute */
......
import os
import sys
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
from Cython.Build import cythonize
# For demo purposes, we build our own tiny library.
......@@ -12,20 +13,19 @@ try:
assert os.system("ar rcs libmymath.a mymath.o") == 0
except:
if not os.path.exists("libmymath.a"):
print "Error building external library, please create libmymath.a manually."
print("Error building external library, please create libmymath.a manually.")
sys.exit(1)
# Here is how to use the library built above.
ext_modules=[
ext_modules = cythonize([
Extension("call_mymath",
sources = ["call_mymath.pyx"],
include_dirs = [os.getcwd()], # path to .h file(s)
library_dirs = [os.getcwd()], # path to .a or .so file(s)
libraries = ['mymath'])
]
sources=["call_mymath.pyx"],
include_dirs=[os.getcwd()], # path to .h file(s)
library_dirs=[os.getcwd()], # path to .a or .so file(s)
libraries=['mymath'])
])
setup(
name = 'Demos',
cmdclass = {'build_ext': build_ext},
ext_modules = ext_modules,
name='Demos',
ext_modules=ext_modules,
)
......@@ -5,8 +5,7 @@ include pylintrc
include setup.py
include setupegg.py
include bin/*
include cython.py
include cygdb.py
include cython.py cythonize.py cygdb.py
recursive-include Cython *.pyx *.pxd
include Doc/*
......
def primes(kmax):
result = []
if kmax > 1000:
kmax = 1000
p = [0] * 1000
k = 0
n = 2
while k < kmax:
i = 0
while i < k and n % p[i] != 0:
i = i + 1
i += 1
if i == k:
p[k] = n
k = k + 1
k += 1
result.append(n)
n = n + 1
n += 1
return result
......@@ -78,6 +78,8 @@ You can show Cython's code analysis by passing the ``--annotate`` option::
%%cython --annotate
...
.. figure:: ipython.png
Using the Sage notebook
-----------------------
......
......@@ -137,10 +137,14 @@ together into :file:`rect.so`, which you can then import in Python using
``import rect`` (if you forget to link the :file:`Rectangle.o`, you will
get missing symbols while importing the library in Python).
Note that the ``language`` option has no effect on user provided Extension
objects that are passed into ``cythonize()``. It is only used for modules
found by file name (as in the example above).
The options can also be passed directly from the source file, which is
often preferable. Starting with version 0.17, Cython also allows to
pass external source files into the ``cythonize()`` command this way.
Here is a simplified setup.py file::
often preferable (and overrides any global option). Starting with
version 0.17, Cython also allows to pass external source files into the
``cythonize()`` command this way. Here is a simplified setup.py file::
from distutils.core import setup
from Cython.Build import cythonize
......
......@@ -1951,6 +1951,8 @@ def runtests(options, cmd_args, coverage=None):
try:
import jedi
if list(map(int, re.findall('[0-9]+', jedi.__version__))) < [0, 8, 1]:
raise ImportError
except ImportError:
exclude_selectors.append(RegExSelector('Jedi'))
......
......@@ -190,13 +190,13 @@ def acquire_nonbuffer1(first, second=None):
"""
>>> acquire_nonbuffer1(3) # doctest: +ELLIPSIS
Traceback (most recent call last):
TypeError: 'int' does not ... the buffer interface
TypeError:... 'int'...
>>> acquire_nonbuffer1(type) # doctest: +ELLIPSIS
Traceback (most recent call last):
TypeError: 'type' does not ... the buffer interface
TypeError:... 'type'...
>>> acquire_nonbuffer1(None, 2) # doctest: +ELLIPSIS
Traceback (most recent call last):
TypeError: 'int' does not ... the buffer interface
TypeError:... 'int'...
"""
cdef object[int] buf
buf = first
......
......@@ -10,13 +10,11 @@ from Cython.Build.Dependencies import cythonize
from distutils.core import setup
setup(
ext_modules = cythonize("*.pyx"),
ext_modules = cythonize("*.pyx", language='c++'),
)
######## a.pyx ########
# distutils: language = c++
from libcpp.vector cimport vector
def use_vector(L):
......
cdef int wrong_args(int x, long y)
cdef long wrong_return_type(int x, int y)
cdef int wrong_exception_check(int x, int y) except 0
cdef int wrong_exception_value(int x, int y) except 0
cdef int wrong_exception_value_check(int x, int y) except 0
cdef int inherit_exception_value(int x, int y) except 0
cdef int inherit_exception_check(int x, int y) except *
# mode: error
# tag: pxd
cdef int wrong_args(int x, int y):
return 2
cdef int wrong_return_type(int x, int y):
return 2
cdef int wrong_exception_check(int x, int y) except? 0:
return 2
cdef int wrong_exception_value(int x, int y) except 1:
return 2
cdef int wrong_exception_value_check(int x, int y) except? 1:
return 2
cdef int inherit_exception_value(int x, int y):
return 2
cdef int inherit_exception_check(int x, int y):
return 2
_ERRORS = """
4:5: Function signature does not match previous declaration
7:5: Function signature does not match previous declaration
10:5: Function signature does not match previous declaration
13:5: Function signature does not match previous declaration
16:5: Function signature does not match previous declaration
19:5: Function signature does not match previous declaration
22:5: Function signature does not match previous declaration
"""
......@@ -18,7 +18,7 @@ def unused_result():
return r
def unused_nested():
def unused_one():
def _unused_one():
pass
def unused_class():
......@@ -53,7 +53,7 @@ _ERRORS = """
9:9: Unused entry 'b'
12:15: Unused argument 'arg'
16:6: Unused result in 'r'
21:4: Unused entry 'unused_one'
21:4: Unused entry '_unused_one'
25:4: Unused entry 'Unused'
35:16: Unused entry 'foo'
36:13: Unused entry 'i'
......
......@@ -14,7 +14,6 @@ from cython.parallel cimport prange, parallel
import gc
import sys
import re
if sys.version_info[0] < 3:
import __builtin__ as builtins
......@@ -26,9 +25,6 @@ __test__ = {}
def testcase(func):
doctest = func.__doc__
if sys.version_info >= (3,1,1):
doctest = doctest.replace('does not have the buffer interface',
'does not support the buffer interface')
if sys.version_info >= (3, 0):
_u = str
else:
......@@ -162,22 +158,22 @@ def acquire_failure3():
@testcase
def acquire_nonbuffer1(first, second=None):
"""
>>> acquire_nonbuffer1(3)
>>> acquire_nonbuffer1(3) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
TypeError: 'int' does not have the buffer interface
>>> acquire_nonbuffer1(type)
TypeError:... 'int'...
>>> acquire_nonbuffer1(type) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
TypeError: 'type' does not have the buffer interface
>>> acquire_nonbuffer1(None, 2)
TypeError:... 'type'...
>>> acquire_nonbuffer1(None, 2) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
TypeError: 'int' does not have the buffer interface
>>> acquire_nonbuffer1(4, object())
TypeError:... 'int'...
>>> acquire_nonbuffer1(4, object()) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
TypeError: 'int' does not have the buffer interface
TypeError:... 'int'...
"""
cdef int[:] buf
buf = first
......
# mode: run
# cython: always_allow_keywords=True
cimport cython
from libc.math cimport sqrt
cdef void empty_cfunc():
print "here"
# same signature
cdef void another_empty_cfunc():
print "there"
def call_empty_cfunc():
"""
>>> call_empty_cfunc()
here
there
"""
cdef object py_func = empty_cfunc
py_func()
cdef object another_py_func = another_empty_cfunc
another_py_func()
cdef double square_c(double x):
return x * x
def call_square_c(x):
"""
>>> call_square_c(2)
4.0
>>> call_square_c(-7)
49.0
"""
cdef object py_func = square_c
return py_func(x)
def return_square_c():
"""
>>> square_c = return_square_c()
>>> square_c(5)
25.0
>>> square_c(x=4)
16.0
>>> square_c.__doc__ # FIXME: try to make original C function name available
'wrap(x: float) -> float'
"""
return square_c
def return_libc_sqrt():
"""
>>> sqrt = return_libc_sqrt()
>>> sqrt(9)
3.0
>>> sqrt(x=9)
3.0
>>> sqrt.__doc__
'wrap(x: float) -> float'
"""
return sqrt
global_csqrt = sqrt
def test_global():
"""
>>> global_csqrt(9)
3.0
>>> global_csqrt.__doc__
'wrap(x: float) -> float'
>>> test_global()
double (double) nogil
Python object
"""
print cython.typeof(sqrt)
print cython.typeof(global_csqrt)
cdef long long rad(long long x):
cdef long long rad = 1
for p in range(2, <long long>sqrt(x) + 1):
if x % p == 0:
rad *= p
while x % p == 0:
x //= p
if x == 1:
break
return rad
cdef bint abc(long long a, long long b, long long c) except -1:
if a + b != c:
raise ValueError("Not a valid abc candidate: (%s, %s, %s)" % (a, b, c))
return rad(a*b*c) < c
def call_abc(a, b, c):
"""
>>> call_abc(2, 3, 5)
False
>>> call_abc(1, 63, 64)
True
>>> call_abc(2, 3**10 * 109, 23**5)
True
>>> call_abc(a=2, b=3**10 * 109, c=23**5)
True
>>> call_abc(1, 1, 1)
Traceback (most recent call last):
...
ValueError: Not a valid abc candidate: (1, 1, 1)
"""
cdef object py_func = abc
return py_func(a, b, c)
def return_abc():
"""
>>> abc = return_abc()
>>> abc(2, 3, 5)
False
>>> abc.__doc__
"wrap(a: 'long long', b: 'long long', c: 'long long') -> bool"
"""
return abc
ctypedef double foo
cdef foo test_typedef_cfunc(foo x):
return x
def test_typedef(x):
"""
>>> test_typedef(100)
100.0
"""
return (<object>test_typedef_cfunc)(x)
cdef union my_union:
int a
double b
cdef struct my_struct:
int which
my_union y
cdef my_struct c_struct_builder(int which, int a, double b):
cdef my_struct value
value.which = which
if which:
value.y.a = a
else:
value.y.b = b
return value
def return_struct_builder():
"""
>>> make = return_struct_builder()
>>> d = make(0, 1, 2)
>>> d['which']
0
>>> d['y']['b']
2.0
>>> d = make(1, 1, 2)
>>> d['which']
1
>>> d['y']['a']
1
>>> make.__doc__
"wrap(which: 'int', a: 'int', b: float) -> 'my_struct'"
"""
return c_struct_builder
cdef object test_object_params_cfunc(a, b):
return a, b
def test_object_params(a, b):
"""
>>> test_object_params(1, 'a')
(1, 'a')
"""
return (<object>test_object_params_cfunc)(a, b)
cdef tuple test_builtin_params_cfunc(list a, dict b):
return a, b
def test_builtin_params(a, b):
"""
>>> test_builtin_params([], {})
([], {})
>>> test_builtin_params(1, 2)
Traceback (most recent call last):
...
TypeError: Argument 'a' has incorrect type (expected list, got int)
"""
return (<object>test_builtin_params_cfunc)(a, b)
def return_builtin_params_cfunc():
"""
>>> cfunc = return_builtin_params_cfunc()
>>> cfunc([1, 2], {'a': 3})
([1, 2], {'a': 3})
>>> cfunc.__doc__
'wrap(a: list, b: dict) -> tuple'
"""
return test_builtin_params_cfunc
cdef class A:
def __repr__(self):
return self.__class__.__name__
cdef class B(A):
pass
cdef A test_cdef_class_params_cfunc(A a, B b):
return b
def test_cdef_class_params(a, b):
"""
>>> test_cdef_class_params(A(), B())
B
>>> test_cdef_class_params(B(), A())
Traceback (most recent call last):
...
TypeError: Argument 'b' has incorrect type (expected cfunc_convert.B, got cfunc_convert.A)
"""
return (<object>test_cdef_class_params_cfunc)(a, b)
# cython: c_string_type=str
# cython: c_string_encoding=ascii
cdef extern from "math.h":
cpdef double pxd_sqrt "sqrt"(double)
# cython: c_string_type=str
# cython: c_string_encoding=ascii
__doc__ = """
>>> sqrt(1)
1.0
>>> pyx_sqrt(4)
2.0
>>> pxd_sqrt(9)
3.0
>>> log(10)
Traceback (most recent call last):
...
NameError: name 'log' is not defined
>>> strchr('abcabc', ord('c'))
'cabc'
"""
cdef extern from "math.h":
cpdef double sqrt(double)
cpdef double pyx_sqrt "sqrt"(double)
cdef double log(double) # not wrapped
cdef extern from "string.h":
# signature must be exact in C++, disagrees with C
cpdef const char* strchr(const char *haystack, int needle);
# tag: posix
from libc.stdlib cimport getenv
from posix.stdlib cimport setenv, unsetenv
from libc.time cimport *
def test_time():
"""
>>> test_time()
"""
cdef time_t t1, t2
t1 = time(NULL)
assert t1 != 0
t1 = time(&t2)
assert t1 == t2
def test_mktime():
"""
>>> test_mktime() # doctest:+ELLIPSIS
(986138177, ...'Sun Apr 1 15:16:17 2001\\n')
"""
cdef tm t, gmt
cdef time_t tt
cdef char *ct
cdef char *tz
tz = getenv("TZ")
setenv("TZ", "UTC", 1)
tzset()
t.tm_sec = 17
t.tm_min = 16
t.tm_hour = 15
t.tm_year = 101
t.tm_mon = 3
t.tm_mday = 1
t.tm_isdst = 0
tt = mktime(&t)
assert tt != -1
ct = ctime(&tt)
assert ct != NULL
if tz:
setenv("TZ", tz, 1)
else:
unsetenv("TZ")
tzset()
return tt, ct
# tag: posix
from posix.sys_time cimport *
def test_itimer(sec, usec):
"""
>>> test_itimer(10, 2)
(10, 2)
"""
cdef itimerval t, gtime
t.it_interval.tv_sec = sec
t.it_interval.tv_usec = usec
t.it_value.tv_sec = sec
t.it_value.tv_usec = usec
ret = setitimer(ITIMER_REAL, &t, NULL)
assert ret == 0
ret = getitimer(ITIMER_REAL, &gtime)
assert ret == 0
t.it_interval.tv_sec = 0
t.it_interval.tv_usec = 0
t.it_value.tv_sec = 0
t.it_value.tv_usec = 0
ret = setitimer(ITIMER_REAL, &t, NULL)
return gtime.it_interval.tv_sec, gtime.it_interval.tv_usec
def test_gettimeofday():
"""
>>> test_gettimeofday()
"""
cdef timeval t
ret = gettimeofday(&t, NULL)
assert ret == 0
# tag: posix
from libc.stdlib cimport getenv
from posix.stdlib cimport setenv, unsetenv
from posix.time cimport *
from posix.time cimport *
def test_time():
def test_itimer(sec, usec):
"""
>>> test_time()
>>> test_itimer(10, 2)
(10, 2)
"""
cdef time_t t1, t2
t1 = time(NULL)
assert t1 != 0
t1 = time(&t2)
assert t1 == t2
cdef itimerval t, gtime
t.it_interval.tv_sec = sec
t.it_interval.tv_usec = usec
t.it_value.tv_sec = sec
t.it_value.tv_usec = usec
ret = setitimer(ITIMER_REAL, &t, NULL)
assert ret == 0
ret = getitimer(ITIMER_REAL, &gtime)
assert ret == 0
t.it_interval.tv_sec = 0
t.it_interval.tv_usec = 0
t.it_value.tv_sec = 0
t.it_value.tv_usec = 0
ret = setitimer(ITIMER_REAL, &t, NULL)
return gtime.it_interval.tv_sec, gtime.it_interval.tv_usec
def test_mktime():
def test_gettimeofday():
"""
>>> test_mktime() # doctest:+ELLIPSIS
(986138177, ...'Sun Apr 1 15:16:17 2001\\n')
>>> test_gettimeofday()
"""
cdef tm t, gmt
cdef time_t tt
cdef char *ct
cdef char *tz
tz = getenv("TZ")
setenv("TZ", "UTC", 1)
tzset()
t.tm_sec = 17
t.tm_min = 16
t.tm_hour = 15
t.tm_year = 101
t.tm_mon = 3
t.tm_mday = 1
t.tm_isdst = 0
tt = mktime(&t)
assert tt != -1
ct = ctime(&tt)
assert ct != NULL
if tz:
setenv("TZ", tz, 1)
else:
unsetenv("TZ")
tzset()
return tt, ct
cdef timeval t
ret = gettimeofday(&t, NULL)
assert ret == 0
......@@ -496,6 +496,32 @@ def safe_c_functions():
assert typeof(f) == 'int (*)(int)', typeof(f)
assert 2 == f(1)
@infer_types(None)
def ptr_types():
"""
>>> ptr_types()
"""
cdef int a
a_ptr = &a
assert typeof(a_ptr) == "int *", typeof(a_ptr)
a_ptr_ptr = &a_ptr
assert typeof(a_ptr_ptr) == "int **", typeof(a_ptr_ptr)
cdef int[1] b
b_ref = b
assert typeof(b_ref) == "int *", typeof(b_ref)
ptr = &a
ptr = b
assert typeof(ptr) == "int *", typeof(ptr)
def const_types(const double x, double y, double& z):
"""
>>> const_types(1, 1, 1)
"""
a = x
a = y
a = z
assert typeof(a) == "double", typeof(a)
@infer_types(None)
def args_tuple_keywords(*args, **kwargs):
"""
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment