Commit b549149c authored by pombredanne's avatar pombredanne

Now supporting semicolon and hash comments.

parent 318ef243
......@@ -74,42 +74,34 @@ class MissingSectionHeaderError(ParsingError):
self.lineno = lineno
self.line = line
# This regex captures either plain sections headers with optional trailing
# comment separated by a semicolon or a pound sign OR ...
# new style section headers with an expression and optional trailing comment
# that then can be only separated by a pound sign.
# This second case could require complex parsing as expressions and comments
# can contain brackets and # signs that would need at least to balance brackets
# This regex captures either sections headers with optional trailing comment
# separated by a semicolon or a hash. Section headers can have an optional
# expression. Expressions and comments can contain brackets but no verbatim '#'
# and ';' : these need to be escaped.
# A title line with an expression has the general form:
# [section_name: some Python expression] # some comment
# [section_name: some Python expression] #; some comment
# This regex leverages the fact that the following is a valid Python expression:
# [some Python expression] # some comment
# and that section headers are always delimited by [brackets] which are also
# the delimiters for Python [lists]
# So instead of doing complex parsing to balance brackets, we capture just
# enough from a header line to collect then remove the section_name and colon
# expression separator keeping only a list-enclosed expression and optional
# comments. Therefore the parsing and validation of this resulting Python
# expression can be entirely delegated to the built-in Python eval compiler.
# The result of the evaluated expression is the always returned wrapped in a
# list with a single item that contains the original expression
# and that section headers are also delimited by [brackets] taht are also [list]
# delimiters.
# So instead of doing complex parsing to balance brackets in an expression, we
# capture just enough from a header line to collect then remove the section_name
# and colon expression separator keeping only a list-enclosed expression and
# optional comments. The parsing and validation of this Python expression can be
# entirely delegated to Python's eval. The result of the evaluated expression is
# the always returned wrapped in a list with a single item that contains the
# original expression
section_header = re.compile(
r'(?P<head>\[)' # opening bracket [ starts a section title line
r'(?P<head>\[)'
r'\s*'
r'(?P<name>[^\s#[\]:;{}]+)'
r'\s*'
r'(?P<name>[^\s[\]:{}]+)' # section name
r'(:(?P<expression>[^#;]*))?'
r'\s*'
r'('
r']' # closing bracket ]
r'\s*'
r'([#;].*)?$' # optional trailing comment marked by '#' or ';'
r'|' # OR
r':' # optional ':' separator for expression
r'\s*'
r'(?P<tail>.*' # optional arbitrary Python expression
r']' # closing bracket ]
r'\s*'
r'\#?.*)$' # optional trailing comment marked by '#'
r')'
r'(?P<tail>]'
r'\s*'
r'([#;].*)?$)'
).match
option_start = re.compile(
......@@ -129,13 +121,13 @@ def parse(fp, fpname, exp_globals=None):
leading whitespace. Blank lines, lines beginning with a '#',
and just about everything else are ignored.
The title line is in the form [name] followed an optional a trailing
comment separated by a semicolon ';' or a pound `#' sign.
The title line is in the form [name] followed by an optional trailing
comment separated by a semicolon `;' or a hash `#' character.
Optionally the title line can have the form [name:expression] where
Optionally the title line can have the form `[name:expression]' where
expression is an arbitrary Python expression. Sections with an expression
that evaluates to False are ignored. In this form, the optional trailing
comment can only be marked by a pound # sign (semi-colon ; is not valid)
that evaluates to False are ignored. Semicolon `;' an hash `#' characters
mustr be string-escaped in expression literals.
exp_globals is a callable returning a mapping of defaults used as globals
during the evaluation of a section conditional expression.
......@@ -179,21 +171,21 @@ def parse(fp, fpname, exp_globals=None):
sectname = header.group('name')
head = header.group('head') # the starting [
tail = header.group('tail') # closing ], expression and comment
if tail:
expression = header.group('expression')
tail = header.group('tail') # closing ]and comment
if expression:
# normalize tail comments to Python style
tail = tail.replace(';', '#') if tail else ''
# un-escape literal # and ; . Do not use a string-escape decode
expr = expression.replace(r'\x23','#').replace(r'x3b', ';')
# rebuild a valid Python expression wrapped in a list
expr = head + expr + tail
# lazily populate context only expression
if not context:
context = exp_globals() if exp_globals else {}
# rebuild a valid Python expression wrapped in a list
expression = head + tail
# by design and construction, the evaluated expression
# is always the first element of a wrapping list
# so we get the first element
section_condition = eval(expression, context)[0]
# ignore section when an expression evaluates to false
# evaluated expression is in list: get first element
section_condition = eval(expr, context)[0]
# finally, ignore section when an expression evaluates to false
if not section_condition:
logger.debug('Ignoring section %(sectname)r with [expression]: %(expression)r' % locals())
continue
......
......@@ -99,13 +99,13 @@ conditional exclusion of sections::
[s1: 2 + 2 == 4] # this expression is true [therefore "this section" _will_ be NOT skipped
a = 1
[ s2 : 2 + 2 == 5 ] # comment: this expression is false, so this section will be ignored
[ s2 : 2 + 2 == 5 ] # comment: this expression is false, so this section will be ignored]
long = a
[ s2 : 41 + 1 == 42 ] # a comment: this expression is true, so this section will be kept
[ s2 : 41 + 1 == 42 ] # a comment: this expression is [true], so this section will be kept
long = b
[s3:2 in map(lambda i:i*2, [i for i in range(10)])] # Complex expressions are [possible!];, though they should not be (abused:)
[s3:2 in map(lambda i:i*2, [i for i in range(10)])] ;# Complex expressions are [possible!];, though they should not be (abused:)
# this section will not be skipped
long = c
......@@ -119,10 +119,13 @@ conditional exclusion of sections::
{'s1': {'a': '1'}, 's2': {'long': 'b'}, 's3': {'long': 'c'}}
The title line can contain an optional trailing comment separated by a pound
sign. The expression and the comment can contain arbitrary characters, including
brackets that are also used to mark the end of a section header and that may be
ambiguous to recognize in some cases. For example, valid sections lines include::
Title line optional trailing comments are separated by a hash '#' or semicolon
';' character. The expression is an arbitrary expression with one restriction:
it cannot contain a literal hash '#' or semicolon ';' character: these need to be
string-escaped.
The comment can contain arbitrary characters, including brackets that are also
used to mark the end of a section header and may be ambiguous to recognize in
some cases. For example, valid sections lines include::
[ a ]
a=1
......@@ -142,9 +145,18 @@ ambiguous to recognize in some cases. For example, valid sections lines include:
[ f ] # ]
f = 1
[g:2 in map(lambda i:i*2, ['''#;)'''] + [i for i in range(10)] + list('#[]][;#'))] # Complex #expressions; ][are [possible!]
[g:2 in map(lambda i:i*2, ['''\x23\x3b)'''] + [i for i in range(10)] + list('\x23[]][\x3b\x23'))] # Complex #expressions; ][are [possible!] and can us escaped # and ; in literals
g = 1
[ h : True ] ; ]
h =1
[ i : True] ; []
i=1
[j:2 in map(lambda i:i*2, ['''\x23\x3b)'''] + [i for i in range(10)] + list('\x23[]][\x3b\x23'))] ; Complex #expressions; ][are [possible!] and can us escaped # and ; in literals
j = 1
.. -> text
>>> try: import StringIO
......@@ -158,28 +170,31 @@ ambiguous to recognize in some cases. For example, valid sections lines include:
'd': {'d': '1'},
'e': {'e': '1'},
'f': {'f': '1'},
'g': {'g': '1'}}
'g': {'g': '1'},
'h': {'h': '1'},
'i': {'i': '1'},
'j': {'j': '1'}}
A title line optional trailing comment may also be separated by a comma
-- for backward compatibility -- if and only if the title line does not contain
an expression. The following are valid::
A title line optional trailing comment be separated by a hash or semicolon
character. The following are valid semicolon-separated comments::
[ a ] ;comma comment are supported for lines without expressions ]
[ a ] ;semicolon comment are supported for lines without expressions ]
a = 1
# this comma separated comment is valid because this section does not contain an expression
[ b ] ; []
b = 1
# this comma separated comment is valid because this section does not contain an expression
[ c ] ; ]
c = 1
# this comma separated comment is valid because this section does not contain an expression
[ d ] ; [
d = 1
[ e: True ] ;semicolon comments are supported for lines with expressions ]
e = 1
.. -> text
>>> try: import StringIO
......@@ -187,30 +202,88 @@ an expression. The following are valid::
>>> import pprint, zc.buildout.configparser
>>> pprint.pprint(zc.buildout.configparser.parse(StringIO.StringIO(
... text), 'test'))
{'a': {'a': '1'}, 'b': {'b': '1'}, 'c': {'c': '1'}, 'd': {'d': '1'}}
{'a': {'a': '1'},
'b': {'b': '1'},
'c': {'c': '1'},
'd': {'d': '1'},
'e': {'e': '1'}}
And the following is invalid and will trigger an error::
The following sections with hash comment separators are valid too::
[ d: True ] ;comma comment are not supported for lines with expressions ]
[ a ] #hash comment ] are supported for lines without expressions ]
a = 1
[ b ] # []
b = 1
[ c ] # ]
c = 1
[ d ] # [
d = 1
[ e: True ] #hash comments] are supported for lines with expressions ]
e = 1
.. -> text
>>> try: import StringIO
... except ImportError: import io as StringIO
>>> import pprint, zc.buildout.configparser
>>> pprint.pprint(zc.buildout.configparser.parse(StringIO.StringIO(
... text), 'test'))
{'a': {'a': '1'},
'b': {'b': '1'},
'c': {'c': '1'},
'd': {'d': '1'},
'e': {'e': '1'}}
However, explicit semicolon and hash characters are invalid in expressions and
must be escaped or this triggers an error. In the rare case where a hash '#' or
semicolon ';' would be needed in an expression literal, you can use the
string-escaped representation of these characters: use '\x23' for hash '#' and
'\x3b' for semicolon ';' to avoid evaluation errors.
These expressions are valid and use escaped hash and semicolons in literals::
[a:2 in map(lambda i:i*2, ['''\x23\x3b)'''] + [i for i in range(10)] + list('\x23[]][\x3b\x23'))] # Complex #expressions; ][are [possible!] and can us escaped # and ; in literals
a = 1
[b:2 in map(lambda i:i*2, ['''\x23\x3b)'''] + [i for i in range(10)] + list('\x23[]][\x3b\x23'))] ; Complex #expressions; ][are [possible!] and can us escaped # and ; in literals
b = 1
.. -> text
>>> try: import StringIO
... except ImportError: import io as StringIO
>>> import pprint, zc.buildout.configparser
>>> pprint.pprint(zc.buildout.configparser.parse(StringIO.StringIO(
... text), 'test'))
{'a': {'a': '1'}, 'b': {'b': '1'}}
And using unescaped semicolon and hash characters in expressions triggers an error::
[a:'#' in '#;'] # this is not a supported expression
a = 1
.. -> text
>>> try: import StringIO
... except ImportError: import io as StringIO
>>> import zc.buildout.configparser
>>> try: zc.buildout.configparser.parse(StringIO.StringIO(text), 'test')
... except SyntaxError: pass # success
... except zc.buildout.configparser.MissingSectionHeaderError: pass # success
One of the typical usage is to have buildout parts that are operating system or
platform specific. The configparser.parse function has an optional
exp_globals argument. This is a callable returning a mapping of objects made
available to the evaluation context of the expression. Here we add the
platform and sys modules to the evaluation context, so we can access platform
and sys functions and objects in our expressions ::
One of the typical usage of expression is to have buildout parts that are
operating system or platform-specific. The configparser.parse function has an
optional exp_globals argument. This is a callable returning a mapping of
objects made available to the evaluation context of the expression. Here we add
the platform and sys modules to the evaluation context, so we can access
platform and sys modules functions and objects in our expressions ::
[s1: platform.python_version_tuple()[0] in ('2', '3',)] # this expression is true, the major versions of python are either 2 or 3
a = 1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment