From 5703194d0ed3112ce78a67014e495ff1887c9e68 Mon Sep 17 00:00:00 2001 From: Stefan Behnel <stefan_ml@behnel.de> Date: Mon, 11 Sep 2017 17:11:37 +0200 Subject: [PATCH] Prevent character escape sequences from being resolved in raw f-strings (fr"..."). Also fix some error reporting issues along the way. Update test_fstring.py test file from Py3.7. --- CHANGES.rst | 3 +++ Cython/Compiler/Parsing.py | 46 +++++++++++++++++++++----------------- tests/run/test_fstring.pyx | 46 ++++++++++++++++++++++++++++---------- 3 files changed, 62 insertions(+), 33 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index ad0f9ddbb..744a7fae6 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -80,6 +80,9 @@ Bugs fixed * Compile time evaluations of (partially) constant f-strings could show incorrect results. +* Escape sequences in raw f-strings (``fr'...'``) were resolved instead of passing + them through as expected. + * Some ref-counting issues in buffer error handling have been resolved. Other changes diff --git a/Cython/Compiler/Parsing.py b/Cython/Compiler/Parsing.py index bf8c5a2a6..5d6b11f81 100644 --- a/Cython/Compiler/Parsing.py +++ b/Cython/Compiler/Parsing.py @@ -11,8 +11,8 @@ cython.declare(Nodes=object, ExprNodes=object, EncodedString=object, bytes_literal=object, StringEncoding=object, FileSourceDescriptor=object, lookup_unicodechar=object, unicode_category=object, Future=object, Options=object, error=object, warning=object, - Builtin=object, ModuleNode=object, Utils=object, - re=object, sys=object, _parse_escape_sequences=object, _unicode=object, _bytes=object, + Builtin=object, ModuleNode=object, Utils=object, _unicode=object, _bytes=object, + re=object, sys=object, _parse_escape_sequences=object, _parse_escape_sequences_raw=object, partial=object, reduce=object, _IS_PY3=cython.bint, _IS_2BYTE_UNICODE=cython.bint) from io import StringIO @@ -1013,22 +1013,25 @@ def _append_escape_sequence(kind, builder, escape_sequence, s): builder.append(escape_sequence) -_parse_escape_sequences = re.compile( +_parse_escape_sequences_raw, _parse_escape_sequences = [re.compile(( # escape sequences: - br'(\\(?:' - br'[\\abfnrtv"\'{]|' - br'[0-7]{2,3}|' - br'N\{[^}]*\}|' - br'x[0-9a-fA-F]{2}|' - br'u[0-9a-fA-F]{4}|' - br'U[0-9a-fA-F]{8}|' - br'[NuU]|' # detect invalid escape sequences that do not match above + br'(\\(?:' + + (br'\\?' if is_raw else ( + br'[\\abfnrtv"\'{]|' + br'[0-7]{2,3}|' + br'N\{[^}]*\}|' + br'x[0-9a-fA-F]{2}|' + br'u[0-9a-fA-F]{4}|' + br'U[0-9a-fA-F]{8}|' + br'[NxuU]|' # detect invalid escape sequences that do not match above + )) + br')?|' # non-escape sequences: br'\{\{?|' br'\}\}?|' - br'[^\\{}]+)'.decode('us-ascii') -).match + br'[^\\{}]+)' + ).decode('us-ascii')).match + for is_raw in (True, False)] def p_f_string(s, unicode_value, pos, is_raw): @@ -1038,13 +1041,15 @@ def p_f_string(s, unicode_value, pos, is_raw): next_start = 0 size = len(unicode_value) builder = StringEncoding.UnicodeLiteralBuilder() + error_pos = list(pos) # [src, line, column] + _parse_seq = _parse_escape_sequences_raw if is_raw else _parse_escape_sequences while next_start < size: end = next_start - match = _parse_escape_sequences(unicode_value, next_start) + error_pos[2] = pos[2] + end # FIXME: handle newlines in string + match = _parse_seq(unicode_value, next_start) if match is None: - error_pos = (pos[0], pos[1] + end, pos[2]) # FIXME: handle newlines in string - error(error_pos, "Invalid escape sequence") + error(tuple(error_pos), "Invalid escape sequence") next_start = match.end() part = match.group() @@ -1068,8 +1073,7 @@ def p_f_string(s, unicode_value, pos, is_raw): if part == '}}': builder.append('}') else: - error_pos = (pos[0], pos[1] + end, pos[2]) # FIXME: handle newlines in string - s.error("f-string: single '}' is not allowed", pos=error_pos) + s.error("f-string: single '}' is not allowed", pos=tuple(error_pos)) else: builder.append(part) @@ -1134,12 +1138,12 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw): expr_pos = (pos[0], pos[1], pos[2] + starting_index + 2) # TODO: find exact code position (concat, multi-line, ...) if not expr_str.strip(): - error(pos, "empty expression not allowed in f-string") + error(expr_pos, "empty expression not allowed in f-string") if terminal_char == '!': i += 1 if i + 2 > size: - error(pos, "invalid conversion char at end of string") + error(expr_pos, "invalid conversion char at end of string") else: conversion_char = unicode_value[i] i += 1 @@ -1152,7 +1156,7 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw): start_format_spec = i + 1 while True: if i >= size: - s.error("missing '}' in format specifier") + s.error("missing '}' in format specifier", pos=expr_pos) c = unicode_value[i] if not in_triple_quotes and not in_string: if c == '{': diff --git a/tests/run/test_fstring.pyx b/tests/run/test_fstring.pyx index 8c46aea0a..309696c28 100644 --- a/tests/run/test_fstring.pyx +++ b/tests/run/test_fstring.pyx @@ -35,9 +35,10 @@ class TestCase(CythonTest): if exception_type is SyntaxError: try: self.fragment(str) - assert held_errors(), "Invalid Cython code failed to raise SyntaxError: %s" % str except CompileError: assert True + else: + assert held_errors(), "Invalid Cython code failed to raise SyntaxError: %r" % str finally: release_errors(ignore=True) else: @@ -46,7 +47,7 @@ class TestCase(CythonTest): except exception_type: assert True else: - assert False, "Invalid Cython code failed to raise %s: %s" % (exception_type, str) + assert False, "Invalid Cython code failed to raise %s: %r" % (exception_type, str) finally: if error_stack: release_errors(ignore=True) @@ -141,18 +142,9 @@ f'{a * x()}'""" self.assertTrue(g.__doc__ is None) def __test_literal_eval(self): - # With no expressions, an f-string is okay. - self.assertEqual(ast.literal_eval("f'x'"), 'x') - self.assertEqual(ast.literal_eval("f'x' 'y'"), 'xy') - - # But this should raise an error. with self.assertRaisesRegex(ValueError, 'malformed node or string'): ast.literal_eval("f'x'") - # As should this, which uses a different ast node - with self.assertRaisesRegex(ValueError, 'malformed node or string'): - ast.literal_eval("f'{3}'") - def __test_ast_compile_time_concat(self): x = [''] @@ -354,6 +346,10 @@ f'{a * x()}'""" "f'{10:{ }}'", "f' { } '", + # The Python parser ignores also the following + # whitespace characters in additional to a space. + "f'''{\t\f\r\n}'''", + # Catch the empty expression before the # invalid conversion. "f'{!x}'", @@ -374,6 +370,12 @@ f'{a * x()}'""" "f'{:x'", ]) + # Different error message is raised for other whitespace characters. + self.assertAllRaise(SyntaxError, 'invalid character in identifier', + ["f'''{\xa0}'''", + #"\xa0", + ]) + def test_parens_in_expressions(self): self.assertEqual(f'{3,}', '(3,)') @@ -435,6 +437,20 @@ f'{a * x()}'""" self.assertEqual(f'2\x203', '2 3') self.assertEqual(f'\x203', ' 3') + #with self.assertWarns(DeprecationWarning): # invalid escape sequence + # value = cy_eval(r"f'\{6*7}'") + #self.assertEqual(value, '\\42') + self.assertEqual(f'\\{6*7}', '\\42') + self.assertEqual(fr'\{6*7}', '\\42') + + AMPERSAND = 'spam' + # Get the right unicode character (&), or pick up local variable + # depending on the number of backslashes. + self.assertEqual(f'\N{AMPERSAND}', '&') + self.assertEqual(f'\\N{AMPERSAND}', '\\Nspam') + self.assertEqual(fr'\N{AMPERSAND}', '\\Nspam') + self.assertEqual(f'\\\N{AMPERSAND}', '\\&') + def test_misformed_unicode_character_name(self): # These test are needed because unicode names are parsed # differently inside f-strings. @@ -808,7 +824,8 @@ f'{a * x()}'""" def test_errors(self): # see issue 26287 - self.assertAllRaise((TypeError, ValueError), 'non-empty', # TypeError in Py3.4+ + exc = ValueError if sys.version_info < (3, 4) else TypeError + self.assertAllRaise(exc, 'unsupported', [r"f'{(lambda: 0):x}'", r"f'{(0,):x}'", ]) @@ -832,6 +849,11 @@ f'{a * x()}'""" self.assertEqual(f'{d["foo"]}', 'bar') self.assertEqual(f"{d['foo']}", 'bar') + def __test_backslash_char(self): + # Check eval of a backslash followed by a control char. + # See bpo-30682: this used to raise an assert in pydebug mode. + self.assertEqual(cy_eval('f"\\\n"'), '') + self.assertEqual(cy_eval('f"\\\r"'), '') if __name__ == '__main__': unittest.main() -- 2.30.9