Now supporting semicolon and hash comments.

b549149c · pombredanne · 318ef243 · b549149c · b549149c
Commit b549149c authored Mar 10, 2013 by pombredanne
Hide whitespace changes
Inline Side-by-side

Showing with 138 additions and 73 deletions

src/zc/buildout/configparser.py src/zc/buildout/configparser.py +39 -47

src/zc/buildout/configparser.test src/zc/buildout/configparser.test +99 -26

No files found.
--- a/src/zc/buildout/configparser.py
+++ b/src/zc/buildout/configparser.py
@@ -74,42 +74,34 @@ class MissingSectionHeaderError(ParsingError):
        self.lineno = lineno
        self.line = line

-# This regex captures either plain sections headers with optional trailing 
-# comment separated by a semicolon or a pound sign OR ...
-# new style section headers with an expression and optional trailing comment 
-# that then can be only separated by a pound sign.
-# This second case could require complex parsing as expressions and comments
-# can contain brackets and # signs that would need at least to balance brackets
+# This regex captures either sections headers with optional trailing comment
+# separated by a semicolon or a hash.  Section headers can have an optional
+# expression. Expressions and comments can contain brackets but no verbatim '#'
+# and ';' : these need to be escaped.
 # A title line with an expression has the general form:
-#  [section_name: some Python expression] # some comment
+#  [section_name: some Python expression] #; some comment
 # This regex leverages the fact that the following is a valid Python expression:
 #  [some Python expression] # some comment
-# and that section headers are always delimited by [brackets] which are also 
-# the delimiters for Python [lists]
-# So instead of doing complex parsing to balance brackets, we capture just 
-# enough from a header line to collect then remove the section_name and colon 
-# expression separator keeping only a list-enclosed expression and optional
-# comments. Therefore the parsing and validation of this resulting Python 
-# expression can be entirely delegated to the built-in Python eval compiler. 
-# The result of the evaluated expression is the always returned wrapped in a 
-# list with a single item that contains the original expression
+# and that section headers are also delimited by [brackets] taht are also [list]
+# delimiters.
+# So instead of doing complex parsing to balance brackets in an expression, we
+# capture just enough from a header line to collect then remove the section_name
+# and colon expression separator keeping only a list-enclosed expression and
+# optional comments. The parsing and validation of this Python expression can be
+# entirely delegated to Python's eval. The result of the evaluated expression is
+# the always returned wrapped in a list with a single item that contains the
+# original expression
+
 section_header  = re.compile(
-    r'(?P<head>\[)'            # opening bracket [ starts a section title line
+    r'(?P<head>\[)'
+    r'\s*'
+    r'(?P<name>[^\s#[\]:;{}]+)'
    r'\s*'
-    r'(?P<name>[^\s[\]:{}]+)'  # section name
+    r'(:(?P<expression>[^#;]*))?'
    r'\s*'
-    r'('
-     r']'                      # closing bracket ] 
-     r'\s*'
-     r'([#;].*)?$'             # optional trailing comment marked by '#' or ';'
-    r'|'                       # OR
-     r':'                      # optional ':' separator for expression
-     r'\s*'
-     r'(?P<tail>.*'            # optional arbitrary Python expression
-     r']'                      # closing bracket ] 
-     r'\s*'
-     r'\#?.*)$'                # optional trailing comment marked by '#'
-    r')'
+    r'(?P<tail>]'
+    r'\s*'
+    r'([#;].*)?$)'
    ).match

 option_start = re.compile(
@@ -129,13 +121,13 @@ def parse(fp, fpname, exp_globals=None):
    leading whitespace.  Blank lines, lines beginning with a '#',
    and just about everything else are ignored.

-    The title line is in the form [name] followed an optional a trailing 
-    comment separated by a semicolon ';' or a pound `#' sign. 
+    The title line is in the form [name] followed by an optional trailing 
+    comment separated by a semicolon `;' or a hash `#' character. 
    
-    Optionally the title line can have the form [name:expression] where 
+    Optionally the title line can have the form `[name:expression]' where 
    expression is an arbitrary Python expression. Sections with an expression 
-    that evaluates to False are ignored. In this form, the optional trailing 
-    comment can only be marked by a pound # sign (semi-colon ; is not valid)
+    that evaluates to False are ignored. Semicolon `;' an hash `#' characters
+    mustr be string-escaped in expression literals.

    exp_globals is a callable returning a mapping of defaults used as globals 
    during the evaluation of a section conditional expression.
@@ -179,21 +171,21 @@ def parse(fp, fpname, exp_globals=None):
                sectname = header.group('name')

                head = header.group('head') # the starting [
-                tail = header.group('tail') # closing ], expression and comment
-                if tail:
+                expression = header.group('expression')
+                tail = header.group('tail') # closing ]and comment
+                if expression:
+                    # normalize tail comments to Python style
+                    tail = tail.replace(';', '#') if tail else ''
+                    # un-escape literal # and ; . Do not use a string-escape decode
+                    expr = expression.replace(r'\x23','#').replace(r'x3b', ';') 
+                    # rebuild a valid Python expression wrapped in a list
+                    expr = head + expr + tail
                    # lazily populate context only expression
                    if not context:
                        context = exp_globals() if exp_globals else {}
-
-                    # rebuild a valid Python expression wrapped in a list
-                    expression = head + tail
-
-                    # by design and construction, the evaluated  expression 
-                    # is always the first element of a wrapping list
-                    # so we get the first element 
-                    section_condition = eval(expression, context)[0]
-
-                    # ignore section when an expression evaluates to false
+                    # evaluated expression is in list: get first element
+                    section_condition = eval(expr, context)[0]
+                    # finally, ignore section when an expression evaluates to false
                    if not section_condition:
                        logger.debug('Ignoring section %(sectname)r with [expression]: %(expression)r' % locals())
                        continue

--- a/src/zc/buildout/configparser.test
+++ b/src/zc/buildout/configparser.test
@@ -99,13 +99,13 @@ conditional exclusion of sections::
  [s1: 2 + 2 == 4] # this expression is true [therefore "this section" _will_ be NOT skipped
  a = 1

-  [   s2 : 2 + 2 == 5  ]         # comment: this expression is false, so this section will be ignored
+  [   s2 : 2 + 2 == 5  ]         # comment: this expression is false, so this section will be ignored]
  long = a

-  [   s2 : 41 + 1 == 42  ]  # a comment: this expression is true, so this section will be kept
+  [   s2 : 41 + 1 == 42  ]  # a comment: this expression is [true], so this section will be kept
  long = b

-  [s3:2 in map(lambda i:i*2, [i for i in range(10)])] # Complex expressions are [possible!];, though they should not be (abused:) 
+  [s3:2 in map(lambda i:i*2, [i for i in range(10)])] ;# Complex expressions are [possible!];, though they should not be (abused:) 
  # this section will not be skipped
  long = c

@@ -119,10 +119,13 @@ conditional exclusion of sections::
    {'s1': {'a': '1'}, 's2': {'long': 'b'}, 's3': {'long': 'c'}}


-The title line can contain an optional trailing comment separated by a pound 
-sign. The expression and the comment can contain arbitrary characters, including
-brackets that are also used to mark the end of a section header and  that may be 
-ambiguous to recognize in some cases. For example, valid sections lines include::
+Title line optional trailing comments are separated by a hash '#' or semicolon
+';' character.  The expression is an arbitrary expression with one restriction: 
+it cannot contain a literal hash '#' or semicolon ';' character: these need to be 
+string-escaped.
+The comment can contain arbitrary characters, including brackets that are also 
+used to mark the end of a section header and may be ambiguous to recognize in 
+some cases. For example, valid sections lines include::

  [ a ]
  a=1
@@ -142,9 +145,18 @@ ambiguous to recognize in some cases. For example, valid sections lines include:
  [ f ]  # ]
  f = 1

-  [g:2 in map(lambda i:i*2, ['''#;)'''] + [i for i in range(10)] + list('#[]][;#'))] # Complex #expressions; ][are [possible!]
+  [g:2 in map(lambda i:i*2, ['''\x23\x3b)'''] + [i for i in range(10)] + list('\x23[]][\x3b\x23'))] # Complex #expressions; ][are [possible!] and can us escaped # and ; in literals
  g = 1

+  [ h : True ]  ; ]
+  h =1
+
+  [ i :  True]  ; []
+  i=1
+
+  [j:2 in map(lambda i:i*2, ['''\x23\x3b)'''] + [i for i in range(10)] + list('\x23[]][\x3b\x23'))] ; Complex #expressions; ][are [possible!] and can us escaped # and ; in literals
+  j = 1
+
 .. -> text

    >>> try: import StringIO
@@ -158,28 +170,31 @@ ambiguous to recognize in some cases. For example, valid sections lines include:
     'd': {'d': '1'},
     'e': {'e': '1'},
     'f': {'f': '1'},
-     'g': {'g': '1'}}
+     'g': {'g': '1'},
+     'h': {'h': '1'},
+     'i': {'i': '1'},
+     'j': {'j': '1'}}
+


-A title line optional trailing comment may also be separated by a comma 
-- for backward compatibility -- if and only if the title line does not contain
-an expression. The following are valid::
+A title line optional trailing comment be separated by a hash or semicolon 
+character. The following are valid semicolon-separated comments::

-  [ a ]  ;comma comment are supported for lines without expressions ]
+  [ a ]  ;semicolon comment are supported for lines without expressions ]
  a = 1

-  # this comma separated comment is valid because this section does not contain an expression
  [ b ]  ; []
  b = 1

-  # this comma separated comment is valid because this section does not contain an expression
  [ c ]  ; ]
  c = 1

-  # this comma separated comment is valid because this section does not contain an expression
  [ d ]  ; [
  d = 1

+  [ e: True ]  ;semicolon comments are supported for lines with expressions ]
+  e = 1
+
 .. -> text

    >>> try: import StringIO
@@ -187,30 +202,88 @@ an expression. The following are valid::
    >>> import pprint, zc.buildout.configparser
    >>> pprint.pprint(zc.buildout.configparser.parse(StringIO.StringIO(
    ...     text), 'test'))
-    {'a': {'a': '1'}, 'b': {'b': '1'}, 'c': {'c': '1'}, 'd': {'d': '1'}}
+    {'a': {'a': '1'},
+     'b': {'b': '1'},
+     'c': {'c': '1'},
+     'd': {'d': '1'},
+     'e': {'e': '1'}}



-And the following is invalid and will trigger an error::
+The following sections with hash comment separators are valid too::

-  [ d: True ]  ;comma comment are not supported for lines with expressions ]
+  [ a ]  #hash comment ] are supported for lines without expressions ]
+  a = 1
+
+  [ b ]  # []
+  b = 1
+
+  [ c ]  # ]
+  c = 1
+
+  [ d ]  # [
  d = 1

+  [ e: True ]  #hash comments] are supported for lines with expressions ]
+  e = 1
+
+.. -> text
+
+    >>> try: import StringIO
+    ... except ImportError: import io as StringIO
+    >>> import pprint, zc.buildout.configparser
+    >>> pprint.pprint(zc.buildout.configparser.parse(StringIO.StringIO(
+    ...     text), 'test'))
+    {'a': {'a': '1'},
+     'b': {'b': '1'},
+     'c': {'c': '1'},
+     'd': {'d': '1'},
+     'e': {'e': '1'}}
+
+
+However, explicit semicolon and hash characters are invalid in expressions and 
+must be escaped or this triggers an error. In the rare case where a hash '#' or
+semicolon ';' would be needed in an expression literal, you can use the 
+string-escaped representation of these characters:  use '\x23' for hash '#' and 
+'\x3b' for semicolon ';' to avoid evaluation errors.
+These expressions are valid and use escaped hash and semicolons in literals::
+
+  [a:2 in map(lambda i:i*2, ['''\x23\x3b)'''] + [i for i in range(10)] + list('\x23[]][\x3b\x23'))] # Complex #expressions; ][are [possible!] and can us escaped # and ; in literals
+  a = 1
+
+  [b:2 in map(lambda i:i*2, ['''\x23\x3b)'''] + [i for i in range(10)] + list('\x23[]][\x3b\x23'))] ; Complex #expressions; ][are [possible!] and can us escaped # and ; in literals
+  b = 1
+
+.. -> text
+
+    >>> try: import StringIO
+    ... except ImportError: import io as StringIO
+    >>> import pprint, zc.buildout.configparser
+    >>> pprint.pprint(zc.buildout.configparser.parse(StringIO.StringIO(
+    ...     text), 'test'))
+    {'a': {'a': '1'}, 'b': {'b': '1'}}
+
+
+And using unescaped semicolon and hash characters in expressions triggers an error::
+
+  [a:'#' in '#;'] # this is not a supported expression
+  a = 1
+
 .. -> text

    >>> try: import StringIO
    ... except ImportError: import io as StringIO
    >>> import zc.buildout.configparser
    >>> try: zc.buildout.configparser.parse(StringIO.StringIO(text), 'test')
-    ... except SyntaxError: pass # success
+    ... except zc.buildout.configparser.MissingSectionHeaderError: pass # success


-One of the typical usage is to have buildout parts that are operating system or
-platform specific.  The configparser.parse function has an optional 
-exp_globals argument.  This is a callable returning a mapping of objects made 
-available to the evaluation context of the expression. Here we add the 
-platform and sys modules to the evaluation context, so we can access platform 
-and sys functions and objects in our expressions ::
+One of the typical usage of expression is to have buildout parts that are 
+operating system or platform-specific.  The configparser.parse function has an 
+optional exp_globals argument.  This is a callable returning a mapping of 
+objects made available to the evaluation context of the expression. Here we add 
+the platform and sys modules to the evaluation context, so we can access 
+platform and sys modules functions and objects in our expressions ::

  [s1: platform.python_version_tuple()[0] in ('2', '3',)] # this expression is true, the major versions of python are either 2 or 3
  a = 1