Commit ca9e2534 authored by john's avatar john

ST.py, Html.py, Zwiki.py have been added as StructuredTextNG

parent 1fa6fb0c
#!/usr/bin/env/python
from string import replace,split
import re
class HTML:
def __init__(self):
self.string = ""
self.level = 1
self.olevels = []
self.ulevels = []
self.types = {"header":self.header,
"named_link":self.named_link,
"order_list":self.order_list,
"unorder_list":self.unorder_list,
"strong":self.strong,
"emphasize":self.emphasize,
"href1":self.href1,
"href2":self.href2,
"description":self.description,
"inner_link":self.inner_link,
"named_link":self.named_link,
"example":self.example,
"underline":self.underline}
def list_check(self,object):
def not_order_list(item):
if type(item).__name__ == "list":
for x in item:
if type(x).__name__ == "instance":
if x.type() == "order_list":
return 0
return 1
def not_unorder_list(item):
if type(item).__name__ == "list":
for x in item:
if type(x).__name__ == "instance":
if x.type() == "unorder_list":
return 0
return 1
tmp = []
for x in self.olevels:
if self.level < x:
self.string = self.string + "</ol>\n"
elif self.level == x and not_order_list(object):
self.string = self.string + "</ol>\n"
else:
tmp.append(x)
self.olevels = tmp
tmp = []
for x in self.ulevels:
if self.level < x:
self.string = self.string + "</ul>\n"
elif self.level == x and not_unorder_list(object):
self.string = self.string + "</ul>\n"
else:
tmp.append(x)
self.ulevels = tmp
def header(self,object):
"""
object is a header instance. Everything within
this instance's string is also a header. Go through
every item in the string an print out the appropriate
info for each item and then close the header
"""
head = "<h%s>" % self.level
self.string = self.string + head
for item in object.string():
if type(item).__name__ == "list":
self.loop(item)
elif type(item).__name__ == "string":
self.paragraph(item)
elif type(item).__name__ == "instance":
self.instance(item)
head = "</h%s>" % self.level
self.string = self.string + head
def order_list(self,object):
"""
object is an ordered list instance. Everything
within is also part of an ordered list.
"""
tmp = 1
for x in self.olevels:
if x == self.level:
tmp = 0
if tmp:
self.olevels.append(self.level)
self.string = self.string + "<ol>\n"
self.string = self.string + "<li>"
for item in object.string():
if type(item).__name__ == "list":
for i in range(len(item)):
if type(item[i]).__name__ == "string":
tmp = re.compile('[ 0-9.]+').match
if tmp(item[i]):
start,end = tmp(item[i]).span()
item[i] = item[i][0:start] + item[i][end:len(item[i])]
self.loop(item)
elif type(item).__name__ == "string":
tmp = re.compile('[0-9.]+').search
start,end = tmp(item).span()
item = item[end:len(item)]
self.paragraph(item)
elif type(item).__name__ == "instance":
self.instance(item)
self.string = self.string + "</li>\n"
def unorder_list(self,object):
"""
object is an unordered list instance. Everything
within is also part of an ordered list.
"""
tmp = 1
for x in self.ulevels:
if x == self.level:
tmp = 0
if tmp:
self.ulevels.append(self.level)
self.string = self.string + "<ul>\n"
self.string = self.string + "<li>"
for item in object.string():
if type(item).__name__ == "list":
for i in range(len(item)):
if type(item[i]).__name__ == "string":
item[i] = replace(item[i],"-","")
self.loop(item)
elif type(item).__name__ == "string":
item = replace(item,"-","")
self.paragraph(item)
elif type(item).__name__ == "instance":
self.instance(item)
self.string = self.string + "</li>\n"
def emphasize(self,object):
"""
object is an emphasize instance. Everything
within is also emphasized.
"""
self.string = self.string + "<em>"
for item in object.string():
if type(item).__name__ == "list":
self.loop(item)
elif type(item).__name__ == "string":
item = replace(item,'*','')
self.paragraph(item)
elif type(item).__name__ == "instance":
self.instance(item)
self.string = self.string + "</em>"
def strong(self,object):
"""
object is a strong instance. Everything
within is also part of an ordered list.
"""
self.string = self.string + "<strong>"
for item in object.string():
if type(item).__name__ == "list":
self.loop(item)
elif type(item).__name__ == "string":
item = replace(item,'**','')
self.paragraph(item)
elif type(item).__name__ == "instance":
self.instance(item)
self.string = self.string + "</strong>"
def underline(self,object):
"""
object is a strong instance. Everything
within is also part of an ordered list.
"""
self.string = self.string + "<u>"
for item in object.string():
if type(item).__name__ == "list":
self.loop(item)
elif type(item).__name__ == "string":
item = replace(item,'_','')
self.paragraph(item)
elif type(item).__name__ == "instance":
self.instance(item)
self.string = self.string + "</u>"
def href1(self,object):
"""
The object's string should be a string, nothing more
"""
result = ""
for x in object.string():
result = result + x
strings = split(result, '":')
strings[0] = replace(strings[0],'"','')
result = replace(strings[1],"\n","\n<br>")
result = "<a href=%s>%s</a>" % (strings[1],strings[0])
result = replace(result,"\n","\n<br>")
self.string = self.string + result
def href2(self,object):
"""
The object's string should be a string, nothing more
"""
result = ""
for x in object.string():
result = result + x
strings = split(result,",")
strings[0] = replace(strings[0],'"','')
result = "<a href=%s>%s</a>" % (strings[1],strings[0])
self.string = self.string + result
def description(self,object):
"""
just print the damn thing out for now
"""
result = ""
for x in object.string():
result = result + x
result = replace(result,"\n","\n<br>")
self.string = self.string + "<descr> " + result + " </descr>"
def example(self,object):
"""
An example object's string should be just a string an
outputed as-is
"""
result = ""
for x in object.string():
result = result + x
result = replace(result,"\n","\n<br>")
result = replace(result,"'","")
self.string = self.string + result
def named_link(self,object):
"""
The object's string should be a string, nothing more
"""
result = ""
for x in object.string():
result = result + x
result = replace(result,".. ","")
tmp = replace(result,"[","")
tmp = replace(tmp,"]","")
result = replace(result,"\n","\n<br>")
result = "<a name=%s>%s</a>" % (tmp,result)
self.string = self.string + result
def inner_link(self,object):
"""
The object's string should be a string, nothing more
"""
result = ""
for x in object.string():
result = result + x
tmp = replace(result,"[","")
tmp = replace(tmp,"]","")
result = replace(result,"\n","\n<br>")
result = "<a href=#%s>%s</a>" % (tmp,result)
self.string = self.string + result
def paragraph(self,object):
object = replace(object,"\n","<br>\n")
self.string = self.string + object
def instance(self,object):
if self.types.has_key(object.type()):
self.types[object.type()](object)
else:
print "error, type not supported ", type(object)
result = "%s,%s" % (object.string(),self.level)
def loop(self,object):
if type(object) == "string":
self.paragraph(object)
for x in object:
if type(x).__name__ == "string":
self.paragraph(x)
elif type(x).__name__ == "list":
self.loop(x)
elif type(x).__name__ == "instance":
self.instance(x)
def call_loop(self,subs):
for y in subs:
self.list_check(y[0])
self.loop(y[0])
if y[1]:
self.level = self.level + 1
self.call_loop(y[1])
self.level = self.level - 1
def __call__(self,struct):
for x in struct:
self.list_check(x[0])
self.loop(x[0])
if x[1]:
self.level = self.level + 1
self.call_loop(x[1])
self.level = self.level - 1
self.string = "<html>\n<body bgcolor='white' text='black'>\n" + self.string[:len(self.string)]
result = self.string
self.string = ""
return result
#!/usr/bin/env/python
import re
from string import split
from string import join
from string import replace
def untabify(str):
""" convert indention tabs to spaces """
front = re.compile('(\n)( *)\t*')
tab = re.compile('\t')
m = front.match(str)
if m:
start,end = m.span()
substr = str[start:end]
substr = tab.sub(' '*8, substr)
return substr + str[end:len(str)]
else:
return str
def indention(str):
"""
Convert all tabs to the appropriate number of spaces.
Find the number of leading spaces. If none, return 0
"""
if str == '\n':
return -1
str = untabify(str) # convert the tabs to spaces
front = re.compile('( *)')
m = front.match(str)
if m:
start,end = m.span() # find number of leading spaces
return end-start
else:
return 0 # no leading spaces
def runner(struct,top,level,numbers):
"""
returns run which is a pointer to
struct to be appended to
"""
i = 3
run = struct[top][1]
if level == 0:
return struct
if level == 1:
return run
if level == 2:
a = numbers[level-2]
return run[a-1][1]
a = numbers[level-2]
run = run[a-1][1]
while i <= level:
a = numbers[i-2]
run = run[a-1][1]
i = i + 1
return run
def find_level(indent, levels):
"""
when the indention is less,
find which level it is a
sublevel of
"""
if levels.has_key(indent):
return levels[indent]
for key in levels.keys():
if key > indent:
return levels[key]
def split_paragraphs(paragraphs):
"""
each paragraph is denoted by the end of a line
and a blank line before the beginning of a new
paragraph
"""
tmp = ''
par = re.compile('\n[ ]*\n')
for paragraph in paragraphs:
tmp = tmp + paragraph
paragraphs = par.split(tmp)
for i in range(len(paragraphs)):
paragraphs[i] = paragraphs[i] + '\n\n'
return paragraphs
def StructuredText(paragraphs):
"""
StructuredText accepts paragraphs, which is a list of
lines to be parsed. StructuredText creates a structure
which mimics the structure of the paragraphs.
Structure => [raw_paragraph,parsed_paragraph,[sub-paragraphs]]
"""
current_level = 0
current_indent = 0
levels = {0:0} # what level
ind = [] # structure based on indention levels
top = -1 # which header are we under
numbers = {0:0} # how many sub-paragraphs already at a level
struct = [] # the structure to be returned
paragraphs = split_paragraphs(paragraphs)
for paragraph in paragraphs :
if paragraph == '\n':
ind.append([-1, paragraph])
else :
ind.append([indention(paragraph), paragraph])
for indent,paragraph in ind :
if indent > -1:
if indent == 0:
"""
a new top header, start over, everything underneath is
a sub-paragraph
"""
current_level = 0
current_indent = 0
top = top + 1
levels = {0:0}
numbers = {0:0}
struct.append([paragraph,[]])
elif indent > current_indent:
current_indent = indent
current_level = current_level + 1
levels[current_indent] = current_level
if not numbers.has_key(current_level-1):
numbers[current_level-1] = 0
run = runner(struct,top,current_level,numbers)
run.append([paragraph,[]])
numbers[current_level-1] = numbers[current_level-1] + 1
elif (indent == current_indent):
if not numbers.has_key(current_level-1):
numbers[current_level-1] = 0
run = runner(struct,top,current_level,numbers)
run.append([paragraph,[]])
numbers[current_level-1] = numbers[current_level-1] + 1
elif indent < current_indent :
current_level = find_level(indent,levels)
current_indent = indent
if not numbers.has_key(current_level-1):
numbers[current_level-1] = 0
run = runner(struct,top,current_level,numbers)
run.append([paragraph,[]])
numbers[current_level-1] = numbers[current_level-1] + 1
return struct
class doc_text:
"""
doc_text is what a paragraph is considered to be until
a structured_text type is found in the paragraph
"""
def __init__(self,str=''):
self.str = str
def type(self,str=''):
return 'text'
def string(self):
return self.str
def __getitem__(self,index):
return self.str[index]
def __call__(self,str,subs):
return None
class doc_header:
"""
This class is for header instances.
The structure that doc_header matches is a single line
paragraph whose sub-paragraphs are of a lower level.
"""
def __init__(self,str=''):
self.expr = re.compile('[ a-zA-Z0-9.:/,-_*<>?]+').match
self.str = [str] # list things within this instance
self.typ = "header" # what type is this expresion
self.start = 0 # start position of expr
self.end = 0 # end position of expr
def type(self):
return self.typ
def string(self):
return self.str
def __getitem__(self,index):
return self.str[index]
def __call__(self,raw_string,subs):
"""
The raw_string is checked to see if it matches the rules
for this structured text expression. If the raw_string does,
it is parsed for the sub-string which matches and a doc_header
instance is returned whose string is the matching substring.
If raw_string does not match, nothing is returned.
"""
lines = []
tmp = split(raw_string, '\n')
for i in range(len(tmp)):
if tmp[i]:
lines.append(tmp[i])
if len(lines) > 1:
return None
if not subs:
return None
if self.expr(raw_string):
start,end = self.expr(raw_string).span()
result = doc_header(raw_string[start:end])
result.start,result.end = self.expr(raw_string).span()
return result
else:
return None
def span(self):
return self.start,self.end
class doc_unorder_list:
"""
This class matches for unordered list elements.
"""
def __init__(self,str=''):
self.expr = re.compile('\s*(-\s+|\*\s+|o\s+)[ a-zA-Z0-9.:/,*_<>]+').match
self.str = [str]
self.typ = "unorder_list"
self.start = 0
self.end = 0
def type(self):
return self.typ
def string(self):
return self.str
def __getitem__(self,index):
return self.str[index]
def __call__(self,raw_string,subs):
"""
The raw_string is checked to see if it matches the rules
for this structured text expression. If the raw_string does,
it is parsed for the sub-string which matches and a doc_unorder_list
instance is returned whose string is the matching substring.
If raw_string does not match, nothing is returned.
"""
if self.expr(raw_string):
result = doc_unorder_list(raw_string[0:len(raw_string)])
result.start, result.end = (0,len(raw_string))
return result
else:
return None
def span(self):
return self.start,self.end
class doc_order_list:
def __init__(self,str=''):
self.expr = re.compile('\s*[0-9]+(?=s*|.)').match
self.str = [str]
self.typ = "order_list"
def type(self):
return self.typ
def string(self):
return self.str
def __getitem__(self,index):
return self.str[index]
def __call__(self,raw_string,subs):
"""
The raw_string is checked to see if it matches the rules
for this structured text expression. If the raw_string does,
it is parsed for the sub-string which matches and a doc_order_list
instance is returned whose string is the matching substring.
If raw_string does not match, nothing is returned.
"""
if self.expr(raw_string):
result = doc_order_list(raw_string[0:len(raw_string)])
result.start, result.end = (0,len(raw_string))
return result
else:
return None
def span(self):
return self.start,self.end
class doc_example:
def __init__(self,str=''):
self.expr = re.compile('\s*\'[ \na-zA-Z0-9.:/\-\_,*<>]+\'(?=s*)').search
self.str = [str]
self.typ = "example"
def type(self):
return self.typ
def string(self):
return self.str
def __getitem__(self,index):
return self.str[index]
def __call__(self,raw_string,subs):
"""
The raw_string is checked to see if it matches the rules
for this structured text expression. If the raw_string does,
it is parsed for the sub-string which matches and a doc_example
instance is returned whose string is the matching substring.
If raw_string does not match, nothing is returned.
"""
if self.expr(raw_string):
start,end = self.expr(raw_string).span()
result = doc_example(raw_string[start:end])
result.start,result.end = self.expr(raw_string).span()
return result
else:
return None
def span(self):
return self.start,self.end
class doc_examples:
def __init__(self,str=''):
self.expr = re.compile('(example|examples|::)\s+').search
self.str = [str]
self.typ = "example"
def type(self):
return self.typ
def string(self):
return self.str
def __getitem__(self,index):
return self.str[index]
def __call__(self,raw_string,subs):
"""
The raw_string is checked to see if it matches the rules
for this structured text expression. If raw_string matches,
all sub-paragraphs of the raw_string are turned into example
paragraphs.
If raw_string does not match, nothing is returned.
"""
if self.expr(raw_string):
for i in range(len(subs)):
subs[i][0] = doc_example(subs[i][0])
return None
def span(self):
return self.start,self.end
class doc_emphasize:
def __init__(self,str=''):
self.expr = re.compile('\s*\*[ \na-zA-Z0-9.:/;,]+\*(?!\*|-)').search
self.str = [str]
self.typ = "emphasize"
def type(self):
return self.typ
def string(self):
return self.str
def __getitem__(self,index):
return self.str[index]
def __call__(self,raw_string,subs):
"""
The raw_string is checked to see if it matches the rules
for this structured text expression. If the raw_string does,
it is parsed for the sub-string which matches and a doc_emphasize
instance is returned whose string is the matching substring.
If raw_string does not match, nothing is returned.
"""
if self.expr(raw_string):
start,end = self.expr(raw_string).span()
result = doc_emphasize(raw_string[start:end])
result.start,result.end = self.expr(raw_string).span()
return result
else:
return None
def span(self):
return self.start,self.end
class doc_strong:
def __init__(self,str=''):
self.expr = re.compile('\s*\*\*[ \na-zA-Z0-9.:/;\-,!]+\*\*').search
self.str = [str]
self.typ = "strong"
def type(self):
return '%s' % self.typ
def string(self):
return self.str
def __getitem__(self,index):
return self.str[index]
def __call__(self,raw_string,subs):
"""
The raw_string is checked to see if it matches the rules
for this structured text expression. If the raw_string does,
it is parsed for the sub-string which matches and a doc_strong
instance is returned whose string is the matching substring.
If raw_string does not match, nothing is returned.
"""
if self.expr(raw_string):
start,end = self.expr(raw_string).span()
result = doc_strong(raw_string[start:end])
result.start,result.end = self.expr(raw_string).span()
return result
else:
return None
def span(self):
return self.start,self.end
class doc_underline:
def __init__(self,str=''):
self.expr = re.compile('\s*_[ \na-zA-Z0-9.:/;,]+_').search
self.str = [str]
self.typ = "underline"
def type(self):
return self.typ
def string(self):
return self.str
def __getitem__(self,index):
return self.str[index]
def __call__(self,raw_string,subs):
"""
The raw_string is checked to see if it matches the rules
for this structured text expression. If the raw_string does,
it is parsed for the sub-string which matches and a doc_underline
instance is returned whose string is the matching substring.
If raw_string does not match, nothing is returned.
"""
if self.expr(raw_string):
start,end = self.expr(raw_string).span()
result = doc_underline(raw_string[start:end])
result.start,result.end = self.expr(raw_string).span()
return result
else:
return None
def span(self):
return self.start,self.end
class doc_href1:
def __init__(self,str=''):
self.expr = re.compile('\"[ a-zA-Z0-9.:/;,]+\":[a-zA-Z0-9.:/;,]+(?=(\s+|\.|\!|\?))').search
self.str = [str]
self.typ = "href1"
def type(self):
return '%s' % self.typ
def string(self):
return self.str
def __getitem__(self,index):
return self.str[index]
def __call__(self,raw_string,subs):
"""
The raw_string is checked to see if it matches the rules
for this structured text expression. If the raw_string does,
it is parsed for the sub-string which matches and a doc_href1
instance is returned whose string is the matching substring.
If raw_string does not match, nothing is returned.
"""
if self.expr(raw_string):
start,end = self.expr(raw_string).span()
result = doc_href1(raw_string[start:end])
result.start,result.end = self.expr(raw_string).span()
return result
else:
return None
def span(self):
return self.start,self.end
class doc_href2:
def __init__(self,str=''):
self.expr = re.compile('\"[ a-zA-Z0-9./:]+\",\s+[ a-zA-Z0-9@.:/;]+(?=(\s+|\.|\!|\?))').search
self.str = [str]
self.typ = "href2"
def type(self):
return self.typ
def string(self):
return self.str
def __getitem__(self,index):
return self.str[index]
def __call__(self,raw_string,subs):
"""
The raw_string is checked to see if it matches the rules
for this structured text expression. If the raw_string does,
it is parsed for the sub-string which matches and a doc_href2
instance is returned whose string is the matching substring.
If raw_string does not match, nothing is returned.
"""
if self.expr(raw_string):
start,end = self.expr(raw_string).span()
result = doc_href2(raw_string[start:end])
result.start,result.end = self.expr(raw_string).span()
return result
else:
return None
def span(self):
return self.start,self.end
class doc_inner_link:
def __init__(self,str=''):
self.expr = re.compile('(?!\.\.\s*)\[[a-zA-Z0-9-_()",.:/]+\]').search
self.str = [str]
self.typ = "inner_link"
def type(self):
return '%s' % self.typ
def string(self):
return self.str
def __getitem__(self,index):
return self.str[index]
def __call__(self,raw_string,subs):
"""
The raw_string is checked to see if it matches the rules
for this structured text expression. If the raw_string does,
it is parsed for the sub-string which matches and a doc_inner_link
instance is returned whose string is the matching substring.
If raw_string does not match, nothing is returned.
"""
if self.expr(raw_string):
start,end = self.expr(raw_string).span()
result = doc_inner_link(raw_string[start:end])
result.start,result.end = self.expr(raw_string).span()
return result
else:
return None
def span(self):
return self.start,self.end
class doc_named_link:
def __init__(self,str=''):
self.expr = re.compile('\.\.\s+\[[a-zA-Z0-9-_()",.:/]+\]').search
self.str = [str]
self.typ = "named_link"
def type(self):
return self.typ
def string(self):
return self.str
def __getitem__(self,index):
return self.str[index]
def __call__(self,raw_string,subs):
"""
The raw_string is checked to see if it matches the rules
for this structured text expression. If the raw_string does,
it is parsed for the sub-string which matches and a doc_named_link
instance is returned whose string is the matching substring.
If raw_string does not match, nothing is returned.
"""
if self.expr(raw_string):
start,end = self.expr(raw_string).span()
result = doc_named_link(raw_string[start:end])
result.start,result.end = self.expr(raw_string).span()
return result
else:
return None
def span(self):
return self.start,self.end
class doc_description:
def __init__(self,str=''):
self.expr = re.compile('\s*[ a-zA-Z0-9.:/,;*<>]+--(?![ a-zA-Z0-9.:/,;]+)').search
self.str = [str]
self.typ = "description"
def type(self):
return self.typ
def string(self):
return self.str
def __getitem__(self,index):
return self.str[index]
def __call__(self,raw_string,subs):
"""
The raw_string is checked to see if it matches the rules
for this structured text expression. If the raw_string does,
it is parsed for the sub-string which matches and a doc_description
instance is returned whose string is the matching substring.
If raw_string does not match, nothing is returned.
"""
lines = split(raw_string,'\n')
if len(lines) < 2:
return None
if lines[1] == '':
return None
if self.expr(raw_string):
start,end = self.expr(raw_string).span()
result = doc_description(raw_string[start:end])
result.start,result.end = self.expr(raw_string).span()
return result
else:
return None
def span(self):
return self.start,self.end
class DOC:
"""
Class instance calls [ex.=> x()] require a structured text
structure. Doc will then parse each paragraph in the structure
and will find the special structures within each paragraph.
Each special structure will be stored as an instance. Special
structures within another special structure are stored within
the 'top' structure
EX : '-underline this-' => would be turned into an underline
instance. '-underline **this**' would be stored as an underline
instance with a strong instance stored in its string
"""
def __init__(self):
self.types = [doc_unorder_list(),
doc_order_list(),
doc_header(),
doc_examples(),
doc_example(),
doc_emphasize(),
doc_strong(),
doc_underline(),
doc_href1(),
doc_href2(),
doc_named_link(),
doc_inner_link(),
doc_description(),
doc_text()]
self.string = [] # list of paragraphs, after being parsed
self.org = [] # the original raw_string
self.now = [] # the strings current appearance
self.struct = [] # the structuredtext object with tagged instances
def dont_check(self,object,expr,subs):
if object.type() == "example":
return 0
if object.type() == "named_link" and expr.type() == "inner_link":
return 0
if object.type() == "order_list" and expr.type() == "header":
return 0
if object.type() == "unorder_list" and expr.type() == "header":
return 0
return 1
def original(self,str):
"""
Finds the original structure of a paragraph.
str is now that paragraph after parsing
"""
for x in str:
if type(x).__name__ == "instance":
self.now = self.now + [x]
self.original(x.string())
elif type(x).__name__ == "string":
self.org = self.org + [x]
self.now = self.now + [x]
else:
self.original(x)
def final_check(self,str,expr,subs):
"""
str is a list
expr is the special type being searched for
subs are the sub-paragraphs of str
final_check puts the string back together and looks
to see if there is an instance that envelopes another
instance. EX : this paragraph ** is a ** header
if str = ['this paragraph', <strong instance>, 'header']
the result should be [<header instance>]. The header
instance's string in this case should be the old value of str
"""
self.original(str)
spans = []
start = 0
end = 0
tmpstr = ""
"""
self.now also has the string associated with an instance,
that string needs to be removed
"""
i = 1
while i < len(self.now):
del self.now[i+1]
i = i + 2
"""
find the spans of all the elements in str
"""
for x in self.org:
end = start + len(x)
spans.append([start,end])
start = start + end
tmpstr = tmpstr + x
self.org = [];
self.now = [];
def parse(self,raw_string,expr,subs=[]):
"""
Parse accepts a raw_string, an expr to test the raw_string,
and the raw_string's subparagraphs.
Parse will continue to search through raw_string until
all instances of expr in raw_string are found.
If no instances of expr are found, raw_string is returned.
Otherwise a list of substrings and instances is returned
"""
tmp = [] # the list to be returned if raw_string is split
while expr(raw_string,subs):
#an instance of expr was found
t = expr(raw_string,subs)
start,end = t.span()
tmp.append(raw_string[0:start])
tmp.append(t)
raw_string = raw_string[end:len(raw_string)]
if tmp:
#the string was broken into a list
return tmp + [raw_string]
else:
#string did not change
return raw_string
def instance(self,object,expr,subs):
"""
Receives an instance and an expression.
Instance will go through the instance's
string and will test the expression on each
string item. Each string item in the instance's
string will then be updated if necessary
"""
if self.dont_check(object,expr,subs):
for i in range(len(object.string())):
if type(object.str[i]).__name__ == "string":
object.str[i] = self.parse(object.str[i],expr,subs)
elif type(object.str[i]).__name__ == "instance":
if object.str[i].type() != "example":
self.instance(object.str[i],expr,subs)
else:
object.str[i] = object[i].str
def list(self,object,expr,subs):
"""
Need to go through each item in a list.
Lists are composed of strings
"""
for i in range(len(object)):
if type(object[i]).__name__ == "instance":
if self.dont_check(object[i],expr,subs):
self.instance(object[i],expr,subs)
else:
object[i] = object[i]
elif type(object[i]).__name__ == "string":
object[i] = self.parse(object[i], expr, subs)
elif type(object[i]).__name__ == "list":
self.list(object[i],expr,subs)
def divide(self,object,expr,subs):
item = object.str
if type(item).__name__ == "list":
self.list(item,expr,subs)
elif type(item).__name__ == "instance":
self.instance(item,expr,subs)
elif type(item).__name__ == "string":
item = self.parse(item,expr,subs)
object.str = item
else:
print type(item).__name__ , " is not supported"
def search(self,str,subs):
str = doc_text(str) # a paragraph is text until proven guilty
"""
Search the paragraph for each special structure
"""
for expr in self.types:
self.divide(str,expr,subs)
result = str.str
"""
need to make sure that their are no instances that were missed
in the over-all structure
"""
if type(result).__name__ == "list":
for expr in self.types:
self.final_check(result,expr,subs)
return result
def call_test(self,par,subs):
tmp = self.search(par,subs)
self.string.append(tmp)
return tmp
def call_loop(self,pars):
for y in pars:
if y[1]:
y[0] = self.call_test(y[0],y[1])
self.call_loop(y[1])
else:
y[0] = self.call_test(y[0],y[1])
def __call__(self,struct):
self.string = []
for x in struct:
x[0] = self.call_test(x[0],x[1])
if x[1]:
self.call_loop(x[1])
self.struct = struct
return self.struct
#!/usr/bin/python
from Html import HTML
from string import split
from ST import DOC
import re
"""
This is the new structured text type.
"""
class Zwiki_Title:
def __init__(self,str=''):
self.expr1 = re.compile('([A-Z]+[A-Z]+[a-zA-Z]*)').search
self.expr2 = re.compile('([A-Z]+[a-z]+[A-Z]+[a-zA-Z]*)').search
self.str = [str]
self.typ = "Zwiki_Title"
def type(self):
return '%s' % self.typ
def string(self):
return self.str
def __getitem__(self,index):
return self.str[index]
def __call__(self,raw_string,subs):
"""
The raw_string is checked to see if it matches the rules
for this structured text expression. If the raw_string does,
it is parsed for the sub-string which matches and a doc_inner_link
instance is returned whose string is the matching substring.
If raw_string does not match, nothing is returned.
"""
if self.expr1(raw_string):
start,end = self.expr1(raw_string).span()
result = Zwiki_Title(raw_string[start:end])
result.start,result.end = self.expr1(raw_string).span()
return result
elif self.expr2(raw_string):
start,end = self.expr2(raw_string).span()
result = Zwiki_Title(raw_string[start:end])
result.start,result.end = self.expr2(raw_string).span()
return result
else:
return None
def span(self):
return self.start,self.end
class Zwiki_doc(DOC):
def __init__(self):
DOC.__init__(self)
"""
Add the new type to self.types
"""
self.types.append(Zwiki_Title())
class Zwiki_parser(HTML):
def __init__(self):
HTML.__init__(self)
self.types["Zwiki_Title"] = self.zwiki_title
def zwiki_title(self,object):
result = ""
for x in object.string():
result = result + x
result = "<a href=%s>%s</a>" % (result,result)
#result = "<dtml-wikiname %s>" % result
self.string = self.string + result
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment