Commit 5984bc11 authored by john's avatar john

bug fixes to ST.py and Html.py, added basic_parser.py

parent e6dd0bb5
#!/usr/bin/env/python #!/usr/bin/env/python
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
from string import replace,split from string import replace,split
import re import re
...@@ -95,6 +11,7 @@ class HTML: ...@@ -95,6 +11,7 @@ class HTML:
self.olevels = [] self.olevels = []
self.ulevels = [] self.ulevels = []
self.par = "off" self.par = "off"
self.endofpar = re.compile("\n\s*\n").search
self.self_par = ["header","order_list", "unorder_list"] self.self_par = ["header","order_list", "unorder_list"]
self.types = {"header":self.header, self.types = {"header":self.header,
"named_link":self.named_link, "named_link":self.named_link,
...@@ -296,6 +213,10 @@ class HTML: ...@@ -296,6 +213,10 @@ class HTML:
self.string = self.string + result self.string = self.string + result
def inner_link(self,object): def inner_link(self,object):
"""
The object's string should be a string, nothing more
"""
result = "" result = ""
for x in object.string(): for x in object.string():
result = result + x result = result + x
......
...@@ -112,7 +112,8 @@ def runner(struct,top,level,numbers): ...@@ -112,7 +112,8 @@ def runner(struct,top,level,numbers):
for x in numbers: for x in numbers:
if level > x: if level > x:
tmp.append(x) tmp.append(x)
numbers = tmp
numbers = tmp
numbers.append(level) numbers.append(level)
if len(numbers) == 1: if len(numbers) == 1:
...@@ -178,7 +179,7 @@ def StructuredText(paragraphs): ...@@ -178,7 +179,7 @@ def StructuredText(paragraphs):
StructuredText accepts paragraphs, which is a list of StructuredText accepts paragraphs, which is a list of
lines to be parsed. StructuredText creates a structure lines to be parsed. StructuredText creates a structure
which mimics the structure of the paragraphs. which mimics the structure of the paragraphs.
Structure => [raw_paragraph,parsed_paragraph,[sub-paragraphs]] Structure => [paragraph,[sub-paragraphs]]
""" """
current_level = 0 current_level = 0
...@@ -194,36 +195,36 @@ def StructuredText(paragraphs): ...@@ -194,36 +195,36 @@ def StructuredText(paragraphs):
if not paragraphs: if not paragraphs:
result = ["",[]] result = ["",[]]
return result return result
for paragraph in paragraphs: for paragraph in paragraphs:
if paragraph == '\n': if paragraph == '\n':
ind.append([-1, paragraph]) ind.append([-1, paragraph])
else : else:
ind.append([indention(paragraph), strip(paragraph)+"\n"]) ind.append([indention(paragraph), strip(paragraph)+"\n"])
current_indent = indention(paragraphs[0]) current_indent = indention(paragraphs[0])
levels[0] = current_indent levels[0] = current_indent
for indent,paragraph in ind : for indent,paragraph in ind :
if indent == 0: if indent == 0:
struct.append([paragraph,[]]) struct.append([paragraph,[]])
current_level = 0 current_level = 0
current_indent = 0 current_indent = 0
numbers = [0] numbers = [0]
levels = {0:0} levels = {0:0}
top = top + 1 top = top + 1
elif indent == current_indent: elif indent == current_indent:
run,numbers = runner(struct,top,current_level,numbers)
run.append([paragraph,[]]) run.append([paragraph,[]])
elif indent > current_indent: elif indent > current_indent:
current_level = current_level + 1 current_level = current_level + 1
current_indent = indent current_indent = indent
numbers.append(current_level)
levels[current_level] = indent levels[current_level] = indent
run,numbers = runner(struct,top,current_level,numbers) run,numbers = runner(struct,top,current_level,numbers)
run.append([paragraph,[]]) run.append([paragraph,[]])
levels[current_level] = indent levels[current_level] = indent
elif indent < current_indent: elif indent < current_indent:
l = parent_level(levels,current_level) l = parent_level(levels,current_level)
if indent > 0 and indent < levels[0]: if indent > 0 and indent < levels[0]:
levels[0] = indent levels[0] = indent
current_indent = indent current_indent = indent
...@@ -253,8 +254,8 @@ def StructuredText(paragraphs): ...@@ -253,8 +254,8 @@ def StructuredText(paragraphs):
current_level = i current_level = i
current_indent = indent current_indent = indent
run,numbers = runner(struct,top,current_level,numbers) run,numbers = runner(struct,top,current_level,numbers)
levels = tmp levels = tmp
run.append([paragraph,[]]) run.append([paragraph,[]])
return struct return struct
class doc_text: class doc_text:
...@@ -288,7 +289,7 @@ class doc_header: ...@@ -288,7 +289,7 @@ class doc_header:
""" """
def __init__(self,str=''): def __init__(self,str=''):
self.expr = re.compile('[ a-zA-Z0-9.:/,-_*<>?]+').match self.expr = re.compile('[ a-zA-Z0-9.:/,-_*<>\?\'\"]+').match
self.str = [str] # list things within this instance self.str = [str] # list things within this instance
self.typ = "header" # what type is this expresion self.typ = "header" # what type is this expresion
self.start = 0 # start position of expr self.start = 0 # start position of expr
...@@ -482,7 +483,7 @@ class doc_examples: ...@@ -482,7 +483,7 @@ class doc_examples:
class doc_emphasize: class doc_emphasize:
def __init__(self,str=''): def __init__(self,str=''):
self.expr = re.compile('\s*\*[ \na-zA-Z0-9.:/;,]+\*(?!\*|-)').search self.expr = re.compile('\s*\*[ \na-zA-Z0-9.:/;,\'\"\?]+\*(?!\*|-)').search
self.str = [str] self.str = [str]
self.typ = "emphasize" self.typ = "emphasize"
...@@ -518,7 +519,7 @@ class doc_emphasize: ...@@ -518,7 +519,7 @@ class doc_emphasize:
class doc_strong: class doc_strong:
def __init__(self,str=''): def __init__(self,str=''):
self.expr = re.compile('\s*\*\*[ \na-zA-Z0-9.:/;\-,!]+\*\*').search self.expr = re.compile('\s*\*\*[ \na-zA-Z0-9.:/;\-,!\?\'\"]+\*\*').search
self.str = [str] self.str = [str]
self.typ = "strong" self.typ = "strong"
...@@ -554,7 +555,7 @@ class doc_strong: ...@@ -554,7 +555,7 @@ class doc_strong:
class doc_underline: class doc_underline:
def __init__(self,str=''): def __init__(self,str=''):
self.expr = re.compile('\s*_[ \na-zA-Z0-9.:/;,]+_').search self.expr = re.compile('\s*_[ \na-zA-Z0-9.:/;,\'\"\?]+_').search
self.str = [str] self.str = [str]
self.typ = "underline" self.typ = "underline"
...@@ -590,7 +591,7 @@ class doc_underline: ...@@ -590,7 +591,7 @@ class doc_underline:
class doc_href1: class doc_href1:
def __init__(self,str=''): def __init__(self,str=''):
self.expr = re.compile('\"[ a-zA-Z0-9.:/;,\n]+\":[a-zA-Z0-9.:/;,\n]+(?=(\s+|\.|\!|\?))').search self.expr = re.compile('\"[ a-zA-Z0-9.:/;,\n\~]+\":[a-zA-Z0-9.:/;,\n\~]+(?=(\s+|\.|\!|\?))').search
self.str = [str] self.str = [str]
self.typ = "href1" self.typ = "href1"
...@@ -793,8 +794,8 @@ class DOC: ...@@ -793,8 +794,8 @@ class DOC:
doc_header(), doc_header(),
doc_examples(), doc_examples(),
doc_example(), doc_example(),
doc_emphasize(),
doc_strong(), doc_strong(),
doc_emphasize(),
doc_underline(), doc_underline(),
doc_href1(), doc_href1(),
doc_href2(), doc_href2(),
......
#!/usr/bin/env/python
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
"""
class display does the basic traversal of a DOC/StructuredText
structure.
This is the process that would be needed for a parser,
such as HTML.
Commented are the steps for getting a basic parser built.
The basic steps
1. Must define in __init__ a self.types dictionary which
has the names of structured text types as its keys and
functions to handle the types as its values.
2. Must define a function for each type type to be interpreted.
NOTE : The function names must match those in the self.types
dictionary.
3. A means of traversing a DOC or StructuredText structure must
be availabe.
NOTE : Unless the format of the structures returned by StructuredText
and DOC have been altered, it should suffice to un-comment the
if else clause in the function loop.
"""
class display:
"""
# here you would define your dictionary of
# types to define. Type_name should be
# the string returned by the instance's .type()
# call. The .str item is what raw strings are
# added to.
def __init__(self):
self.str = ""
self.types = {"type_name":def_name}
# paragraph would receive raw strings
# only and add them to self.str
def paragraph(self,par):
self.str = self.str + par
# Here you would traverse the instance's
# (in this case object) string through
# the .string() call. The string can
# consist of strings, instances, and lists only
# (anything else is a mistake)
# Will need a function like this for every type
# value in the self.types dictionary
def def_name(self,object):
for item in object.string():
if type(item).__name__ == "string":
self.paragraph(item)
elif type(item).__name__ == "list":
self.loop(item)
elif type(item).__name__ == "instance":
self.types[item.type()](item)
"""
def loop(self,object):
print object
"""
UNCOMMENT THIS IF ELSE CLAUSE
# This traverse's either a raw_string,
# which is sent to self.paragraph, or
# a list (often from an instance's string)
# which is traversed and the strings, lists,
# and instance's are parsed
if type(object) == "string":
self.paragraph(object)
else:
for x in object:
if type(x).__name__ == "string":
self.paragraph(object)
elif type(x).__name__ == "list":
self.loop(x)
elif type(x).__name__ == "instance":
self.types[x.type()](object)
"""
def call_loop(self,subs):
"""
# call loop handles sub-paragraphs
# y[0] is the original paragraph,
# y[1] is a list of that paragraph's
# sub-paragraphs
"""
for y in subs:
self.loop(y[0])
if y[1]:
self.call_loop(y[1])
def __call__(self,struct):
"""
# x[0] is the original paragraph,
# x[1] is a list of that paragraph's
# sub-paragraphs
"""
for x in struct:
self.loop(x[0])
if x[1]:
self.call_loop(x[1])
"""
result = self.str
self.str = 0
return result
"""
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment