Commit 091ce1d8 authored by Stefan Behnel's avatar Stefan Behnel

repair issues found by CPython f-string tests

parent 5451bab2
...@@ -898,7 +898,7 @@ def p_string_literal(s, kind_override=None): ...@@ -898,7 +898,7 @@ def p_string_literal(s, kind_override=None):
else: else:
if kind_override is not None and kind_override in 'ub': if kind_override is not None and kind_override in 'ub':
kind = kind_override kind = kind_override
if kind in {'u', 'f'}: # f-strings are scanned exactly like Unicode literals, but are parsed further later if kind in ('u', 'f'): # f-strings are scanned exactly like Unicode literals, but are parsed further later
chars = StringEncoding.UnicodeLiteralBuilder() chars = StringEncoding.UnicodeLiteralBuilder()
elif kind == '': elif kind == '':
chars = StringEncoding.StrLiteralBuilder(s.source_encoding) chars = StringEncoding.StrLiteralBuilder(s.source_encoding)
...@@ -998,17 +998,17 @@ def p_f_string(s, unicode_value, pos): ...@@ -998,17 +998,17 @@ def p_f_string(s, unicode_value, pos):
current_literal_start = 0 current_literal_start = 0
while i < size: while i < size:
c = unicode_value[i] c = unicode_value[i]
if c in ('{', '}'): if c in '{}':
if i + 1 < size and unicode_value[i + 1] == c: if i + 1 < size and unicode_value[i + 1] == c:
encoded_str = EncodedString(unicode_value[current_literal_start:i + 1]) encoded_str = EncodedString(unicode_value[current_literal_start:i + 1])
values.append(ExprNodes.UnicodeNode(pos, value = encoded_str)) values.append(ExprNodes.UnicodeNode(pos, value=encoded_str))
i += 2 i += 2
current_literal_start = i current_literal_start = i
elif c == '}': elif c == '}':
s.error("single '}' encountered in format string") s.error("single '}' encountered in format string")
else: else:
encoded_str = EncodedString(unicode_value[current_literal_start:i]) encoded_str = EncodedString(unicode_value[current_literal_start:i])
values.append(ExprNodes.UnicodeNode(pos, value = encoded_str)) values.append(ExprNodes.UnicodeNode(pos, value=encoded_str))
i, expr_node = p_f_string_expr(s, unicode_value, pos, i + 1) i, expr_node = p_f_string_expr(s, unicode_value, pos, i + 1)
current_literal_start = i current_literal_start = i
values.append(expr_node) values.append(expr_node)
...@@ -1016,7 +1016,7 @@ def p_f_string(s, unicode_value, pos): ...@@ -1016,7 +1016,7 @@ def p_f_string(s, unicode_value, pos):
i += 1 i += 1
encoded_str = EncodedString(unicode_value[current_literal_start:]) encoded_str = EncodedString(unicode_value[current_literal_start:])
values.append(ExprNodes.UnicodeNode(pos, value = encoded_str)) values.append(ExprNodes.UnicodeNode(pos, value=encoded_str))
return values return values
...@@ -1069,8 +1069,12 @@ def p_f_string_expr(s, unicode_value, pos, starting_index): ...@@ -1069,8 +1069,12 @@ def p_f_string_expr(s, unicode_value, pos, starting_index):
break break
i += 1 i += 1
# the expression is parsed as if it is surrounded by parentheses # normalise line endings as the parser expects that
expr_str = u'(%s)' % unicode_value[starting_index:i] expr_str = unicode_value[starting_index:i].replace('\r\n', '\n').replace('\r', '\n')
expr_pos = (pos[0], pos[1], pos[2] + starting_index + 2) # TODO: find exact code position (concat, multi-line, ...)
if not expr_str.strip():
s.error("empty expression not allowed in f-string")
if terminal_char == '!': if terminal_char == '!':
i += 1 i += 1
...@@ -1085,21 +1089,30 @@ def p_f_string_expr(s, unicode_value, pos, starting_index): ...@@ -1085,21 +1089,30 @@ def p_f_string_expr(s, unicode_value, pos, starting_index):
terminal_char = unicode_value[i] terminal_char = unicode_value[i]
if terminal_char == ':': if terminal_char == ':':
in_triple_quotes = False
in_string = False
nested_depth = 0 nested_depth = 0
start_format_spec = i + 1 start_format_spec = i + 1
while True: while True:
if i >= size: if i >= size:
s.error("missing '}' in format specifier") s.error("missing '}' in format specifier")
c = unicode_value[i] c = unicode_value[i]
if c == '{': if not in_triple_quotes and not in_string:
if nested_depth >= 1: if c == '{':
s.error("nesting of '{' in format specifier is not allowed") if nested_depth >= 1:
nested_depth += 1 s.error("nesting of '{' in format specifier is not allowed")
elif c == '}' and nested_depth == 0: nested_depth += 1
terminal_char = c elif c == '}' and nested_depth == 0:
break terminal_char = c
elif c == '}': break
nested_depth -= 1 elif c == '}':
nested_depth -= 1
if c in '\'"':
if not in_string and i + 2 < size and unicode_value[i + 1] == c and unicode_value[i + 2] == c:
in_triple_quotes = not in_triple_quotes
i += 2
elif not in_triple_quotes:
in_string = not in_string
i += 1 i += 1
format_spec_str = unicode_value[start_format_spec:i] format_spec_str = unicode_value[start_format_spec:i]
...@@ -1107,11 +1120,9 @@ def p_f_string_expr(s, unicode_value, pos, starting_index): ...@@ -1107,11 +1120,9 @@ def p_f_string_expr(s, unicode_value, pos, starting_index):
if terminal_char != '}': if terminal_char != '}':
s.error("missing '}' in format string expression'") s.error("missing '}' in format string expression'")
# parse the expression # parse the expression as if it was surrounded by parentheses
name = 'format string expression' buf = StringIO('(%s)' % expr_str)
code_source = StringSourceDescriptor(name, expr_str) scanner = PyrexScanner(buf, expr_pos[0], parent_scanner=s, source_encoding=s.source_encoding, initial_pos=expr_pos)
buf = StringIO(expr_str)
scanner = PyrexScanner(buf, code_source, parent_scanner=s, source_encoding=s.source_encoding)
expr = p_testlist(scanner) # TODO is testlist right here? expr = p_testlist(scanner) # TODO is testlist right here?
# validate the conversion char # validate the conversion char
...@@ -1120,13 +1131,13 @@ def p_f_string_expr(s, unicode_value, pos, starting_index): ...@@ -1120,13 +1131,13 @@ def p_f_string_expr(s, unicode_value, pos, starting_index):
# the format spec is itself treated like an f-string # the format spec is itself treated like an f-string
if format_spec_str is not None: if format_spec_str is not None:
format_spec = ExprNodes.JoinedStrNode(pos, values = p_f_string(s, format_spec_str, pos)) format_spec = ExprNodes.JoinedStrNode(pos, values=p_f_string(s, format_spec_str, pos))
else: else:
format_spec = None format_spec = None
return i + 1, ExprNodes.FormattedValueNode( return i + 1, ExprNodes.FormattedValueNode(
s.position(), value = expr, conversion_char = conversion_char, s.position(), value=expr, conversion_char=conversion_char,
format_spec = format_spec) format_spec=format_spec)
# since PEP 448: # since PEP 448:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment