Commit 4f57d524 authored by Julian Rüth's avatar Julian Rüth

Relevant options/directives must go into the cache fingerprint

I made the section that drops options quite verbose because it took me a while to
figure out whether an option should be included or not. I hope I got everything
right.
parent 97d42bce
...@@ -616,15 +616,28 @@ class DependencyTree(object): ...@@ -616,15 +616,28 @@ class DependencyTree(object):
def newest_dependency(self, filename): def newest_dependency(self, filename):
return max([self.extract_timestamp(f) for f in self.all_dependencies(filename)]) return max([self.extract_timestamp(f) for f in self.all_dependencies(filename)])
def transitive_fingerprint(self, filename, extra=None): def transitive_fingerprint(self, filename, module, compilation_options):
r"""
Return a fingerprint of a cython file that is about to be cythonized.
Fingerprints are looked up in future compilations. If the fingerprint
is found, the cythonization can be skipped. The fingerprint must
incorporate everything that has an influence on the generated code.
"""
try: try:
m = hashlib.md5(__version__.encode('UTF-8')) m = hashlib.md5(__version__.encode('UTF-8'))
m.update(file_hash(filename).encode('UTF-8')) m.update(file_hash(filename).encode('UTF-8'))
for x in sorted(self.all_dependencies(filename)): for x in sorted(self.all_dependencies(filename)):
if os.path.splitext(x)[1] not in ('.c', '.cpp', '.h'): if os.path.splitext(x)[1] not in ('.c', '.cpp', '.h'):
m.update(file_hash(x).encode('UTF-8')) m.update(file_hash(x).encode('UTF-8'))
if extra is not None: # Include the module attributes that change the compilation result
m.update(str(extra).encode('UTF-8')) # in the fingerprint. We do not iterate over module.__dict__ and
# include almost everything here as users might extend Extension
# with arbitrary (random) attributes that would lead to cache
# misses.
m.update(str((module.language, module.py_limited_api, module.np_pythran)).encode('UTF-8'))
m.update(compilation_options.get_fingerprint().encode('UTF-8'))
return m.hexdigest() return m.hexdigest()
except IOError: except IOError:
return None return None
...@@ -881,8 +894,6 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False, ...@@ -881,8 +894,6 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False,
if 'include_path' not in options: if 'include_path' not in options:
options['include_path'] = ['.'] options['include_path'] = ['.']
if 'common_utility_include_dir' in options: if 'common_utility_include_dir' in options:
if options.get('cache'):
raise NotImplementedError("common_utility_include_dir does not yet work with caching")
safe_makedirs(options['common_utility_include_dir']) safe_makedirs(options['common_utility_include_dir'])
pythran_options = None pythran_options = None
...@@ -973,8 +984,7 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False, ...@@ -973,8 +984,7 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False,
else: else:
print("Compiling %s because it depends on %s." % (source, dep)) print("Compiling %s because it depends on %s." % (source, dep))
if not force and options.cache: if not force and options.cache:
extra = m.language fingerprint = deps.transitive_fingerprint(source, m, options)
fingerprint = deps.transitive_fingerprint(source, extra)
else: else:
fingerprint = None fingerprint = None
to_compile.append(( to_compile.append((
......
...@@ -78,3 +78,23 @@ class TestInline(CythonTest): ...@@ -78,3 +78,23 @@ class TestInline(CythonTest):
self.fresh_cythonize(a_pyx, cache=self.cache_dir) self.fresh_cythonize(a_pyx, cache=self.cache_dir)
for output in expected: for output in expected:
self.assertTrue(os.path.exists(output), output) self.assertTrue(os.path.exists(output), output)
def test_options_invalidation(self):
hash_pyx = os.path.join(self.src_dir, 'options.pyx')
hash_c = hash_pyx[:-len('.pyx')] + '.c'
open(hash_pyx, 'w').write('pass')
self.fresh_cythonize(hash_pyx, cache=self.cache_dir, cplus=False)
self.assertEqual(1, len(self.cache_files('options.c*')))
open(hash_pyx, 'w').write('pass')
self.fresh_cythonize(hash_pyx, cache=self.cache_dir, cplus=True)
self.assertEqual(2, len(self.cache_files('options.c*')))
open(hash_pyx, 'w').write('pass')
self.fresh_cythonize(hash_pyx, cache=self.cache_dir, cplus=False, show_version=False)
self.assertEqual(2, len(self.cache_files('options.c*')))
open(hash_pyx, 'w').write('pass')
self.fresh_cythonize(hash_pyx, cache=self.cache_dir, cplus=False, show_version=True)
self.assertEqual(2, len(self.cache_files('options.c*')))
...@@ -519,29 +519,11 @@ class CompilationSource(object): ...@@ -519,29 +519,11 @@ class CompilationSource(object):
class CompilationOptions(object): class CompilationOptions(object):
r"""
See default_options at the end of this module for a list of all possible
options and CmdLine.usage and CmdLine.parse_command_line() for their
meaning.
""" """
Options to the Cython compiler:
show_version boolean Display version number
use_listing_file boolean Generate a .lis file
errors_to_stderr boolean Echo errors to stderr when using .lis
include_path [string] Directories to search for include files
output_file string Name of generated .c file
generate_pxi boolean Generate .pxi file for public declarations
capi_reexport_cincludes
boolean Add cincluded headers to any auto-generated
header files.
timestamps boolean Only compile changed source files.
verbose boolean Always print source names being compiled
compiler_directives dict Overrides for pragma options (see Options.py)
embedded_metadata dict Metadata to embed in the C file as json.
evaluate_tree_assertions boolean Test support: evaluate parse tree assertions
language_level integer The Python language level: 2 or 3
formal_grammar boolean Parse the file with the formal grammar
cplus boolean Compile as c++ code
"""
def __init__(self, defaults=None, **kw): def __init__(self, defaults=None, **kw):
self.include_path = [] self.include_path = []
if defaults: if defaults:
...@@ -595,6 +577,80 @@ class CompilationOptions(object): ...@@ -595,6 +577,80 @@ class CompilationOptions(object):
return Context(self.include_path, self.compiler_directives, return Context(self.include_path, self.compiler_directives,
self.cplus, self.language_level, options=self) self.cplus, self.language_level, options=self)
def get_fingerprint(self):
r"""
Return a string that contains all the options that are relevant for cache invalidation.
"""
data = {}
for key in self.__dict__:
if key in ['show_version', 'errors_to_stderr', 'verbose', 'quiet']:
# verbosity flags have no influence on the compilation result
continue
elif key in ['output_file', 'output_dir']:
# ignore the exact name of the output file
continue
elif key in ['timestamps']:
# the cache cares about the content of files, not about the timestamps of sources
continue
elif key in ['cache']:
# hopefully caching has no influence on the compilation result
continue
elif key in ['compiler_directives']:
# directives passed on to the C compiler do not influence the generated C code
continue
elif key in ['include_path']:
# this path changes which headers are tracked as dependencies,
# it has no influence on the generated C code
continue
elif key in ['working_path']:
# this path changes where modules and pxd files are found;
# their content is part of the fingerprint anyway, their
# absolute path does not matter
continue
elif key in ['create_extension']:
# create_extension() has already mangled the options, e.g.,
# embedded_metadata, when the fingerprint is computed so we
# ignore it here.
continue
elif key in ['use_listing_file', 'generate_pxi', 'annotate', 'annotate_coverage_xml']:
# all output files are contained in the cache so the types of
# files generated must be part of the fingerprint
pass
elif key in ['formal_grammar', 'evaluate_tree_assertions']:
# these bits can change whether compilation to C passes/fails
pass
elif key in ['embedded_metadata', 'emit_linenums', 'c_line_in_traceback', 'gdb_debug', 'relative_path_in_code_position_comments']:
# the generated code contains additional bits when these are set
pass
elif key in ['cplus', 'language_level', 'compile_time_env', 'np_pythran']:
# assorted bits that, e.g., influence the parser
pass
elif key == ['capi_reexport_cincludes']:
if self.capi_reexport_cincludes:
# our caching implementation does not yet include fingerprints of all the header files
raise NotImplementedError('capi_reexport_cincludes is not compatible with Cython caching')
elif key == ['common_utility_include_dir']:
if self.common_utility_include_dir:
raise NotImplementedError('common_utility_include_dir is not compatible with Cython caching yet')
else:
# any unexpected option should go into the fingerprint; it's better
# to recompile than to return incorrect results from the cache.
pass
data[key] = self.__dict__[key]
def to_fingerprint(item):
r"""
Recursively turn item into a string, turning dicts into lists with
deterministic ordering.
"""
if isinstance(item, dict):
item = sorted([(repr(key), to_fingerprint(item[key])) for key in item])
return repr(item)
return to_fingerprint(data)
class CompilationResult(object): class CompilationResult(object):
""" """
...@@ -769,7 +825,6 @@ default_options = dict( ...@@ -769,7 +825,6 @@ default_options = dict(
compile_time_env = None, compile_time_env = None,
common_utility_include_dir = None, common_utility_include_dir = None,
output_dir=None, output_dir=None,
build_dir=None,
cache=None, cache=None,
create_extension=None, create_extension=None,
np_pythran=False np_pythran=False
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment