Commit 5607fabd authored by Stefan Behnel's avatar Stefan Behnel

reformat Plex code files

parent 727e57d9
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
#======================================================================= #=======================================================================
class Action(object): class Action(object):
def perform(self, token_stream, text): def perform(self, token_stream, text):
pass # abstract pass # abstract
...@@ -78,15 +77,18 @@ class Ignore(Action): ...@@ -78,15 +77,18 @@ class Ignore(Action):
to be ignored. See the docstring of Plex.Lexicon for more to be ignored. See the docstring of Plex.Lexicon for more
information. information.
""" """
def perform(self, token_stream, text): def perform(self, token_stream, text):
return None return None
def __repr__(self): def __repr__(self):
return "IGNORE" return "IGNORE"
IGNORE = Ignore() IGNORE = Ignore()
#IGNORE.__doc__ = Ignore.__doc__ #IGNORE.__doc__ = Ignore.__doc__
class Text(Action): class Text(Action):
""" """
TEXT is a Plex action which causes the text of a token to TEXT is a Plex action which causes the text of a token to
...@@ -100,6 +102,7 @@ class Text(Action): ...@@ -100,6 +102,7 @@ class Text(Action):
def __repr__(self): def __repr__(self):
return "TEXT" return "TEXT"
TEXT = Text() TEXT = Text()
#TEXT.__doc__ = Text.__doc__ #TEXT.__doc__ = Text.__doc__
......
...@@ -13,7 +13,7 @@ from .Machines import LOWEST_PRIORITY ...@@ -13,7 +13,7 @@ from .Machines import LOWEST_PRIORITY
from .Transitions import TransitionMap from .Transitions import TransitionMap
def nfa_to_dfa(old_machine, debug = None): def nfa_to_dfa(old_machine, debug=None):
""" """
Given a nondeterministic Machine, return a new equivalent Given a nondeterministic Machine, return a new equivalent
Machine which is deterministic. Machine which is deterministic.
...@@ -50,6 +50,7 @@ def nfa_to_dfa(old_machine, debug = None): ...@@ -50,6 +50,7 @@ def nfa_to_dfa(old_machine, debug = None):
state_map.dump(debug) state_map.dump(debug)
return new_machine return new_machine
def set_epsilon_closure(state_set): def set_epsilon_closure(state_set):
""" """
Given a set of states, return the union of the epsilon Given a set of states, return the union of the epsilon
...@@ -61,6 +62,7 @@ def set_epsilon_closure(state_set): ...@@ -61,6 +62,7 @@ def set_epsilon_closure(state_set):
result[state2] = 1 result[state2] = 1
return result return result
def epsilon_closure(state): def epsilon_closure(state):
""" """
Return the set of states reachable from the given state Return the set of states reachable from the given state
...@@ -74,6 +76,7 @@ def epsilon_closure(state): ...@@ -74,6 +76,7 @@ def epsilon_closure(state):
add_to_epsilon_closure(result, state) add_to_epsilon_closure(result, state)
return result return result
def add_to_epsilon_closure(state_set, state): def add_to_epsilon_closure(state_set, state):
""" """
Recursively add to |state_set| states reachable from the given state Recursively add to |state_set| states reachable from the given state
...@@ -86,6 +89,7 @@ def add_to_epsilon_closure(state_set, state): ...@@ -86,6 +89,7 @@ def add_to_epsilon_closure(state_set, state):
for state2 in state_set_2: for state2 in state_set_2:
add_to_epsilon_closure(state_set, state2) add_to_epsilon_closure(state_set, state2)
class StateMap(object): class StateMap(object):
""" """
Helper class used by nfa_to_dfa() to map back and forth between Helper class used by nfa_to_dfa() to map back and forth between
...@@ -98,7 +102,7 @@ class StateMap(object): ...@@ -98,7 +102,7 @@ class StateMap(object):
def __init__(self, new_machine): def __init__(self, new_machine):
self.new_machine = new_machine self.new_machine = new_machine
self.old_to_new_dict = {} self.old_to_new_dict = {}
self.new_to_old_dict= {} self.new_to_old_dict = {}
def old_to_new(self, old_state_set): def old_to_new(self, old_state_set):
""" """
...@@ -129,12 +133,12 @@ class StateMap(object): ...@@ -129,12 +133,12 @@ class StateMap(object):
best_priority = priority best_priority = priority
return best_action return best_action
# def old_to_new_set(self, old_state_set): # def old_to_new_set(self, old_state_set):
# """ # """
# Return the new state corresponding to a set of old states as # Return the new state corresponding to a set of old states as
# a singleton set. # a singleton set.
# """ # """
# return {self.old_to_new(old_state_set):1} # return {self.old_to_new(old_state_set):1}
def new_to_old(self, new_state): def new_to_old(self, new_state):
"""Given a new state, return a set of corresponding old states.""" """Given a new state, return a set of corresponding old states."""
...@@ -151,6 +155,7 @@ class StateMap(object): ...@@ -151,6 +155,7 @@ class StateMap(object):
def dump(self, file): def dump(self, file):
from .Transitions import state_set_str from .Transitions import state_set_str
for new_state in self.new_machine.states: for new_state in self.new_machine.states:
old_state_set = self.new_to_old_dict[id(new_state)] old_state_set = self.new_to_old_dict[id(new_state)]
file.write(" State %s <-- %s\n" % ( file.write(" State %s <-- %s\n" % (
......
...@@ -6,32 +6,39 @@ ...@@ -6,32 +6,39 @@
# #
#======================================================================= #=======================================================================
class PlexError(Exception): class PlexError(Exception):
message = "" message = ""
class PlexTypeError(PlexError, TypeError): class PlexTypeError(PlexError, TypeError):
pass pass
class PlexValueError(PlexError, ValueError): class PlexValueError(PlexError, ValueError):
pass pass
class InvalidRegex(PlexError): class InvalidRegex(PlexError):
pass pass
class InvalidToken(PlexError):
class InvalidToken(PlexError):
def __init__(self, token_number, message): def __init__(self, token_number, message):
PlexError.__init__(self, "Token number %d: %s" % (token_number, message)) PlexError.__init__(self, "Token number %d: %s" % (token_number, message))
class InvalidScanner(PlexError): class InvalidScanner(PlexError):
pass pass
class AmbiguousAction(PlexError): class AmbiguousAction(PlexError):
message = "Two tokens with different actions can match the same string" message = "Two tokens with different actions can match the same string"
def __init__(self): def __init__(self):
pass pass
class UnrecognizedInput(PlexError): class UnrecognizedInput(PlexError):
scanner = None scanner = None
position = None position = None
...@@ -43,8 +50,5 @@ class UnrecognizedInput(PlexError): ...@@ -43,8 +50,5 @@ class UnrecognizedInput(PlexError):
self.state_name = state_name self.state_name = state_name
def __str__(self): def __str__(self):
return ("'%s', line %d, char %d: Token not recognised in state %s" return ("'%s', line %d, char %d: Token not recognised in state %s" % (
% (self.position + (repr(self.state_name),))) self.position + (repr(self.state_name),)))
...@@ -38,6 +38,7 @@ class State(object): ...@@ -38,6 +38,7 @@ class State(object):
self.name = name self.name = name
self.tokens = tokens self.tokens = tokens
class Lexicon(object): class Lexicon(object):
""" """
Lexicon(specification) builds a lexical analyser from the given Lexicon(specification) builds a lexical analyser from the given
...@@ -113,11 +114,12 @@ class Lexicon(object): ...@@ -113,11 +114,12 @@ class Lexicon(object):
machine = None # Machine machine = None # Machine
tables = None # StateTableMachine tables = None # StateTableMachine
def __init__(self, specifications, debug = None, debug_flags = 7, timings = None): def __init__(self, specifications, debug=None, debug_flags=7, timings=None):
if type(specifications) != types.ListType: if type(specifications) != types.ListType:
raise Errors.InvalidScanner("Scanner definition is not a list") raise Errors.InvalidScanner("Scanner definition is not a list")
if timings: if timings:
from .Timing import time from .Timing import time
total_time = 0.0 total_time = 0.0
time1 = time() time1 = time()
nfa = Machines.Machine() nfa = Machines.Machine()
...@@ -129,11 +131,11 @@ class Lexicon(object): ...@@ -129,11 +131,11 @@ class Lexicon(object):
for token in spec.tokens: for token in spec.tokens:
self.add_token_to_machine( self.add_token_to_machine(
nfa, user_initial_state, token, token_number) nfa, user_initial_state, token, token_number)
token_number = token_number + 1 token_number += 1
elif type(spec) == types.TupleType: elif type(spec) == types.TupleType:
self.add_token_to_machine( self.add_token_to_machine(
nfa, default_initial_state, spec, token_number) nfa, default_initial_state, spec, token_number)
token_number = token_number + 1 token_number += 1
else: else:
raise Errors.InvalidToken( raise Errors.InvalidToken(
token_number, token_number,
...@@ -145,7 +147,7 @@ class Lexicon(object): ...@@ -145,7 +147,7 @@ class Lexicon(object):
if debug and (debug_flags & 1): if debug and (debug_flags & 1):
debug.write("\n============= NFA ===========\n") debug.write("\n============= NFA ===========\n")
nfa.dump(debug) nfa.dump(debug)
dfa = DFA.nfa_to_dfa(nfa, debug = (debug_flags & 3) == 3 and debug) dfa = DFA.nfa_to_dfa(nfa, debug=(debug_flags & 3) == 3 and debug)
if timings: if timings:
time4 = time() time4 = time()
total_time = total_time + (time4 - time3) total_time = total_time + (time4 - time3)
...@@ -176,8 +178,8 @@ class Lexicon(object): ...@@ -176,8 +178,8 @@ class Lexicon(object):
action = Actions.Call(action_spec) action = Actions.Call(action_spec)
final_state = machine.new_state() final_state = machine.new_state()
re.build_machine(machine, initial_state, final_state, re.build_machine(machine, initial_state, final_state,
match_bol = 1, nocase = 0) match_bol=1, nocase=0)
final_state.set_action(action, priority = -token_number) final_state.set_action(action, priority=-token_number)
except Errors.PlexError, e: except Errors.PlexError, e:
raise e.__class__("Token number %d: %s" % (token_number, e)) raise e.__class__("Token number %d: %s" % (token_number, e))
......
...@@ -59,6 +59,7 @@ class Machine(object): ...@@ -59,6 +59,7 @@ class Machine(object):
for s in self.states: for s in self.states:
s.dump(file) s.dump(file)
class Node(object): class Node(object):
"""A state of an NFA or DFA.""" """A state of an NFA or DFA."""
transitions = None # TransitionMap transitions = None # TransitionMap
...@@ -111,7 +112,7 @@ class Node(object): ...@@ -111,7 +112,7 @@ class Node(object):
# Header # Header
file.write(" State %d:\n" % self.number) file.write(" State %d:\n" % self.number)
# Transitions # Transitions
# self.dump_transitions(file) # self.dump_transitions(file)
self.transitions.dump(file) self.transitions.dump(file)
# Action # Action
action = self.action action = self.action
...@@ -122,21 +123,21 @@ class Node(object): ...@@ -122,21 +123,21 @@ class Node(object):
def __lt__(self, other): def __lt__(self, other):
return self.number < other.number return self.number < other.number
class FastMachine(object): class FastMachine(object):
""" """
FastMachine is a deterministic machine represented in a way that FastMachine is a deterministic machine represented in a way that
allows fast scanning. allows fast scanning.
""" """
initial_states = None # {state_name:state} initial_states = None # {state_name:state}
states = None # [state] states = None # [state] where state = {event:state, 'else':state, 'action':Action}
# where state = {event:state, 'else':state, 'action':Action}
next_number = 1 # for debugging next_number = 1 # for debugging
new_state_template = { new_state_template = {
'':None, 'bol':None, 'eol':None, 'eof':None, 'else':None '': None, 'bol': None, 'eol': None, 'eof': None, 'else': None
} }
def __init__(self, old_machine = None): def __init__(self, old_machine=None):
self.initial_states = initial_states = {} self.initial_states = initial_states = {}
self.states = [] self.states = []
if old_machine: if old_machine:
...@@ -159,7 +160,7 @@ class FastMachine(object): ...@@ -159,7 +160,7 @@ class FastMachine(object):
for state in self.states: for state in self.states:
state.clear() state.clear()
def new_state(self, action = None): def new_state(self, action=None):
number = self.next_number number = self.next_number
self.next_number = number + 1 self.next_number = number + 1
result = self.new_state_template.copy() result = self.new_state_template.copy()
...@@ -179,7 +180,7 @@ class FastMachine(object): ...@@ -179,7 +180,7 @@ class FastMachine(object):
elif code1 != maxint: elif code1 != maxint:
while code0 < code1: while code0 < code1:
state[unichr(code0)] = new_state state[unichr(code0)] = new_state
code0 = code0 + 1 code0 += 1
else: else:
state[event] = new_state state[event] = new_state
...@@ -241,10 +242,10 @@ class FastMachine(object): ...@@ -241,10 +242,10 @@ class FastMachine(object):
while i < n: while i < n:
c1 = ord(char_list[i]) c1 = ord(char_list[i])
c2 = c1 c2 = c1
i = i + 1 i += 1
while i < n and ord(char_list[i]) == c2 + 1: while i < n and ord(char_list[i]) == c2 + 1:
i = i + 1 i += 1
c2 = c2 + 1 c2 += 1
result.append((chr(c1), chr(c2))) result.append((chr(c1), chr(c2)))
return tuple(result) return tuple(result)
......
...@@ -42,14 +42,15 @@ def chars_to_ranges(s): ...@@ -42,14 +42,15 @@ def chars_to_ranges(s):
while i < n: while i < n:
code1 = ord(char_list[i]) code1 = ord(char_list[i])
code2 = code1 + 1 code2 = code1 + 1
i = i + 1 i += 1
while i < n and code2 >= ord(char_list[i]): while i < n and code2 >= ord(char_list[i]):
code2 = code2 + 1 code2 += 1
i = i + 1 i += 1
result.append(code1) result.append(code1)
result.append(code2) result.append(code2)
return result return result
def uppercase_range(code1, code2): def uppercase_range(code1, code2):
""" """
If the range of characters from code1 to code2-1 includes any If the range of characters from code1 to code2-1 includes any
...@@ -63,6 +64,7 @@ def uppercase_range(code1, code2): ...@@ -63,6 +64,7 @@ def uppercase_range(code1, code2):
else: else:
return None return None
def lowercase_range(code1, code2): def lowercase_range(code1, code2):
""" """
If the range of characters from code1 to code2-1 includes any If the range of characters from code1 to code2-1 includes any
...@@ -76,6 +78,7 @@ def lowercase_range(code1, code2): ...@@ -76,6 +78,7 @@ def lowercase_range(code1, code2):
else: else:
return None return None
def CodeRanges(code_list): def CodeRanges(code_list):
""" """
Given a list of codes as returned by chars_to_ranges, return Given a list of codes as returned by chars_to_ranges, return
...@@ -86,6 +89,7 @@ def CodeRanges(code_list): ...@@ -86,6 +89,7 @@ def CodeRanges(code_list):
re_list.append(CodeRange(code_list[i], code_list[i + 1])) re_list.append(CodeRange(code_list[i], code_list[i + 1]))
return Alt(*re_list) return Alt(*re_list)
def CodeRange(code1, code2): def CodeRange(code1, code2):
""" """
CodeRange(code1, code2) is an RE which matches any character CodeRange(code1, code2) is an RE which matches any character
...@@ -98,6 +102,7 @@ def CodeRange(code1, code2): ...@@ -98,6 +102,7 @@ def CodeRange(code1, code2):
else: else:
return RawCodeRange(code1, code2) return RawCodeRange(code1, code2)
# #
# Abstract classes # Abstract classes
# #
...@@ -211,6 +216,7 @@ class RE(object): ...@@ -211,6 +216,7 @@ class RE(object):
## def calc_str(self): ## def calc_str(self):
## return "Char(%s)" % repr(self.char) ## return "Char(%s)" % repr(self.char)
def Char(c): def Char(c):
""" """
Char(c) is an RE which matches the character |c|. Char(c) is an RE which matches the character |c|.
...@@ -222,6 +228,7 @@ def Char(c): ...@@ -222,6 +228,7 @@ def Char(c):
result.str = "Char(%s)" % repr(c) result.str = "Char(%s)" % repr(c)
return result return result
class RawCodeRange(RE): class RawCodeRange(RE):
""" """
RawCodeRange(code1, code2) is a low-level RE which matches any character RawCodeRange(code1, code2) is a low-level RE which matches any character
...@@ -252,6 +259,7 @@ class RawCodeRange(RE): ...@@ -252,6 +259,7 @@ class RawCodeRange(RE):
def calc_str(self): def calc_str(self):
return "CodeRange(%d,%d)" % (self.code1, self.code2) return "CodeRange(%d,%d)" % (self.code1, self.code2)
class _RawNewline(RE): class _RawNewline(RE):
""" """
RawNewline is a low-level RE which matches a newline character. RawNewline is a low-level RE which matches a newline character.
...@@ -266,6 +274,7 @@ class _RawNewline(RE): ...@@ -266,6 +274,7 @@ class _RawNewline(RE):
s = self.build_opt(m, initial_state, EOL) s = self.build_opt(m, initial_state, EOL)
s.add_transition((nl_code, nl_code + 1), final_state) s.add_transition((nl_code, nl_code + 1), final_state)
RawNewline = _RawNewline() RawNewline = _RawNewline()
...@@ -304,7 +313,7 @@ class Seq(RE): ...@@ -304,7 +313,7 @@ class Seq(RE):
i = len(re_list) i = len(re_list)
match_nl = 0 match_nl = 0
while i: while i:
i = i - 1 i -= 1
re = re_list[i] re = re_list[i]
if re.match_nl: if re.match_nl:
match_nl = 1 match_nl = 1
...@@ -354,7 +363,7 @@ class Alt(RE): ...@@ -354,7 +363,7 @@ class Alt(RE):
non_nullable_res.append(re) non_nullable_res.append(re)
if re.match_nl: if re.match_nl:
match_nl = 1 match_nl = 1
i = i + 1 i += 1
self.nullable_res = nullable_res self.nullable_res = nullable_res
self.non_nullable_res = non_nullable_res self.non_nullable_res = non_nullable_res
self.nullable = nullable self.nullable = nullable
...@@ -434,6 +443,7 @@ Empty.__doc__ = \ ...@@ -434,6 +443,7 @@ Empty.__doc__ = \
""" """
Empty.str = "Empty" Empty.str = "Empty"
def Str1(s): def Str1(s):
""" """
Str1(s) is an RE which matches the literal string |s|. Str1(s) is an RE which matches the literal string |s|.
...@@ -442,6 +452,7 @@ def Str1(s): ...@@ -442,6 +452,7 @@ def Str1(s):
result.str = "Str(%s)" % repr(s) result.str = "Str(%s)" % repr(s)
return result return result
def Str(*strs): def Str(*strs):
""" """
Str(s) is an RE which matches the literal string |s|. Str(s) is an RE which matches the literal string |s|.
...@@ -454,6 +465,7 @@ def Str(*strs): ...@@ -454,6 +465,7 @@ def Str(*strs):
result.str = "Str(%s)" % ','.join(map(repr, strs)) result.str = "Str(%s)" % ','.join(map(repr, strs))
return result return result
def Any(s): def Any(s):
""" """
Any(s) is an RE which matches any character in the string |s|. Any(s) is an RE which matches any character in the string |s|.
...@@ -463,6 +475,7 @@ def Any(s): ...@@ -463,6 +475,7 @@ def Any(s):
result.str = "Any(%s)" % repr(s) result.str = "Any(%s)" % repr(s)
return result return result
def AnyBut(s): def AnyBut(s):
""" """
AnyBut(s) is an RE which matches any character (including AnyBut(s) is an RE which matches any character (including
...@@ -475,6 +488,7 @@ def AnyBut(s): ...@@ -475,6 +488,7 @@ def AnyBut(s):
result.str = "AnyBut(%s)" % repr(s) result.str = "AnyBut(%s)" % repr(s)
return result return result
AnyChar = AnyBut("") AnyChar = AnyBut("")
AnyChar.__doc__ = \ AnyChar.__doc__ = \
""" """
...@@ -482,7 +496,8 @@ AnyChar.__doc__ = \ ...@@ -482,7 +496,8 @@ AnyChar.__doc__ = \
""" """
AnyChar.str = "AnyChar" AnyChar.str = "AnyChar"
def Range(s1, s2 = None):
def Range(s1, s2=None):
""" """
Range(c1, c2) is an RE which matches any single character in the range Range(c1, c2) is an RE which matches any single character in the range
|c1| to |c2| inclusive. |c1| to |c2| inclusive.
...@@ -495,11 +510,12 @@ def Range(s1, s2 = None): ...@@ -495,11 +510,12 @@ def Range(s1, s2 = None):
else: else:
ranges = [] ranges = []
for i in range(0, len(s1), 2): for i in range(0, len(s1), 2):
ranges.append(CodeRange(ord(s1[i]), ord(s1[i+1]) + 1)) ranges.append(CodeRange(ord(s1[i]), ord(s1[i + 1]) + 1))
result = Alt(*ranges) result = Alt(*ranges)
result.str = "Range(%s)" % repr(s1) result.str = "Range(%s)" % repr(s1)
return result return result
def Opt(re): def Opt(re):
""" """
Opt(re) is an RE which matches either |re| or the empty string. Opt(re) is an RE which matches either |re| or the empty string.
...@@ -508,6 +524,7 @@ def Opt(re): ...@@ -508,6 +524,7 @@ def Opt(re):
result.str = "Opt(%s)" % re result.str = "Opt(%s)" % re
return result return result
def Rep(re): def Rep(re):
""" """
Rep(re) is an RE which matches zero or more repetitions of |re|. Rep(re) is an RE which matches zero or more repetitions of |re|.
...@@ -516,12 +533,14 @@ def Rep(re): ...@@ -516,12 +533,14 @@ def Rep(re):
result.str = "Rep(%s)" % re result.str = "Rep(%s)" % re
return result return result
def NoCase(re): def NoCase(re):
""" """
NoCase(re) is an RE which matches the same strings as RE, but treating NoCase(re) is an RE which matches the same strings as RE, but treating
upper and lower case letters as equivalent. upper and lower case letters as equivalent.
""" """
return SwitchCase(re, nocase = 1) return SwitchCase(re, nocase=1)
def Case(re): def Case(re):
""" """
...@@ -529,7 +548,7 @@ def Case(re): ...@@ -529,7 +548,7 @@ def Case(re):
upper and lower case letters as distinct, i.e. it cancels the effect upper and lower case letters as distinct, i.e. it cancels the effect
of any enclosing NoCase(). of any enclosing NoCase().
""" """
return SwitchCase(re, nocase = 0) return SwitchCase(re, nocase=0)
# #
# RE Constants # RE Constants
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
from __future__ import absolute_import from __future__ import absolute_import
import cython import cython
cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object) cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object)
from . import Errors from . import Errors
...@@ -50,25 +51,25 @@ class Scanner(object): ...@@ -50,25 +51,25 @@ class Scanner(object):
""" """
# lexicon = None # Lexicon # lexicon = None # Lexicon
# stream = None # file-like object # stream = None # file-like object
# name = '' # name = ''
# buffer = '' # buffer = ''
# buf_start_pos = 0 # position in input of start of buffer # buf_start_pos = 0 # position in input of start of buffer
# next_pos = 0 # position in input of next char to read # next_pos = 0 # position in input of next char to read
# cur_pos = 0 # position in input of current char # cur_pos = 0 # position in input of current char
# cur_line = 1 # line number of current char # cur_line = 1 # line number of current char
# cur_line_start = 0 # position in input of start of current line # cur_line_start = 0 # position in input of start of current line
# start_pos = 0 # position in input of start of token # start_pos = 0 # position in input of start of token
# start_line = 0 # line number of start of token # start_line = 0 # line number of start of token
# start_col = 0 # position in line of start of token # start_col = 0 # position in line of start of token
# text = None # text of last token read # text = None # text of last token read
# initial_state = None # Node # initial_state = None # Node
# state_name = '' # Name of initial state # state_name = '' # Name of initial state
# queue = None # list of tokens to be returned # queue = None # list of tokens to be returned
# trace = 0 # trace = 0
def __init__(self, lexicon, stream, name = '', initial_pos = None): def __init__(self, lexicon, stream, name='', initial_pos=None):
""" """
Scanner(lexicon, stream, name = '') Scanner(lexicon, stream, name = '')
...@@ -143,7 +144,8 @@ class Scanner(object): ...@@ -143,7 +144,8 @@ class Scanner(object):
if self.trace: if self.trace:
print("Scanner: read: Performing %s %d:%d" % ( print("Scanner: read: Performing %s %d:%d" % (
action, self.start_pos, self.cur_pos)) action, self.start_pos, self.cur_pos))
text = self.buffer[self.start_pos - self.buf_start_pos : text = self.buffer[
self.start_pos - self.buf_start_pos:
self.cur_pos - self.buf_start_pos] self.cur_pos - self.buf_start_pos]
return (text, action) return (text, action)
else: else:
...@@ -198,19 +200,19 @@ class Scanner(object): ...@@ -198,19 +200,19 @@ class Scanner(object):
buf_index = next_pos - buf_start_pos buf_index = next_pos - buf_start_pos
if buf_index < buf_len: if buf_index < buf_len:
c = buffer[buf_index] c = buffer[buf_index]
next_pos = next_pos + 1 next_pos += 1
else: else:
discard = self.start_pos - buf_start_pos discard = self.start_pos - buf_start_pos
data = self.stream.read(0x1000) data = self.stream.read(0x1000)
buffer = self.buffer[discard:] + data buffer = self.buffer[discard:] + data
self.buffer = buffer self.buffer = buffer
buf_start_pos = buf_start_pos + discard buf_start_pos += discard
self.buf_start_pos = buf_start_pos self.buf_start_pos = buf_start_pos
buf_len = len(buffer) buf_len = len(buffer)
buf_index = buf_index - discard buf_index -= discard
if data: if data:
c = buffer[buf_index] c = buffer[buf_index]
next_pos = next_pos + 1 next_pos += 1
else: else:
c = u'' c = u''
# End inlined: c = self.read_char() # End inlined: c = self.read_char()
...@@ -226,7 +228,7 @@ class Scanner(object): ...@@ -226,7 +228,7 @@ class Scanner(object):
cur_char = u'\n' cur_char = u'\n'
input_state = 3 input_state = 3
elif input_state == 3: elif input_state == 3:
cur_line = cur_line + 1 cur_line += 1
cur_line_start = cur_pos = next_pos cur_line_start = cur_pos = next_pos
cur_char = BOL cur_char = BOL
input_state = 1 input_state = 1
...@@ -263,7 +265,7 @@ class Scanner(object): ...@@ -263,7 +265,7 @@ class Scanner(object):
def next_char(self): def next_char(self):
input_state = self.input_state input_state = self.input_state
if self.trace: if self.trace:
print("Scanner: next: %s [%d] %d" % (" "*20, input_state, self.cur_pos)) print("Scanner: next: %s [%d] %d" % (" " * 20, input_state, self.cur_pos))
if input_state == 1: if input_state == 1:
self.cur_pos = self.next_pos self.cur_pos = self.next_pos
c = self.read_char() c = self.read_char()
...@@ -279,7 +281,7 @@ class Scanner(object): ...@@ -279,7 +281,7 @@ class Scanner(object):
self.cur_char = u'\n' self.cur_char = u'\n'
self.input_state = 3 self.input_state = 3
elif input_state == 3: elif input_state == 3:
self.cur_line = self.cur_line + 1 self.cur_line += 1
self.cur_line_start = self.cur_pos = self.next_pos self.cur_line_start = self.cur_pos = self.next_pos
self.cur_char = BOL self.cur_char = BOL
self.input_state = 1 self.input_state = 1
...@@ -313,7 +315,7 @@ class Scanner(object): ...@@ -313,7 +315,7 @@ class Scanner(object):
self.lexicon.get_initial_state(state_name)) self.lexicon.get_initial_state(state_name))
self.state_name = state_name self.state_name = state_name
def produce(self, value, text = None): def produce(self, value, text=None):
""" """
Called from an action procedure, causes |value| to be returned Called from an action procedure, causes |value| to be returned
as the token value from read(). If |text| is supplied, it is as the token value from read(). If |text| is supplied, it is
......
...@@ -25,7 +25,6 @@ def re(s): ...@@ -25,7 +25,6 @@ def re(s):
class REParser(object): class REParser(object):
def __init__(self, s): def __init__(self, s):
self.s = s self.s = s
self.i = -1 self.i = -1
......
...@@ -40,7 +40,7 @@ class TransitionMap(object): ...@@ -40,7 +40,7 @@ class TransitionMap(object):
map = None # The list of codes and states map = None # The list of codes and states
special = None # Mapping for special events special = None # Mapping for special events
def __init__(self, map = None, special = None): def __init__(self, map=None, special=None):
if not map: if not map:
map = [-maxint, {}, maxint] map = [-maxint, {}, maxint]
if not special: if not special:
...@@ -50,7 +50,7 @@ class TransitionMap(object): ...@@ -50,7 +50,7 @@ class TransitionMap(object):
#self.check() ### #self.check() ###
def add(self, event, new_state, def add(self, event, new_state,
TupleType = tuple): TupleType=tuple):
""" """
Add transition to |new_state| on |event|. Add transition to |new_state| on |event|.
""" """
...@@ -61,12 +61,12 @@ class TransitionMap(object): ...@@ -61,12 +61,12 @@ class TransitionMap(object):
map = self.map map = self.map
while i < j: while i < j:
map[i + 1][new_state] = 1 map[i + 1][new_state] = 1
i = i + 2 i += 2
else: else:
self.get_special(event)[new_state] = 1 self.get_special(event)[new_state] = 1
def add_set(self, event, new_set, def add_set(self, event, new_set,
TupleType = tuple): TupleType=tuple):
""" """
Add transitions to the states in |new_set| on |event|. Add transitions to the states in |new_set| on |event|.
""" """
...@@ -77,19 +77,19 @@ class TransitionMap(object): ...@@ -77,19 +77,19 @@ class TransitionMap(object):
map = self.map map = self.map
while i < j: while i < j:
map[i + 1].update(new_set) map[i + 1].update(new_set)
i = i + 2 i += 2
else: else:
self.get_special(event).update(new_set) self.get_special(event).update(new_set)
def get_epsilon(self, def get_epsilon(self,
none = None): none=None):
""" """
Return the mapping for epsilon, or None. Return the mapping for epsilon, or None.
""" """
return self.special.get('', none) return self.special.get('', none)
def iteritems(self, def iteritems(self,
len = len): len=len):
""" """
Return the mapping as an iterable of ((code1, code2), state_set) and Return the mapping as an iterable of ((code1, code2), state_set) and
(special_event, state_set) pairs. (special_event, state_set) pairs.
...@@ -106,17 +106,18 @@ class TransitionMap(object): ...@@ -106,17 +106,18 @@ class TransitionMap(object):
if set or else_set: if set or else_set:
result.append(((code0, code1), set)) result.append(((code0, code1), set))
code0 = code1 code0 = code1
i = i + 2 i += 2
for event, set in self.special.iteritems(): for event, set in self.special.iteritems():
if set: if set:
result.append((event, set)) result.append((event, set))
return iter(result) return iter(result)
items = iteritems items = iteritems
# ------------------- Private methods -------------------- # ------------------- Private methods --------------------
def split(self, code, def split(self, code,
len = len, maxint = maxint): len=len, maxint=maxint):
""" """
Search the list for the position of the split point for |code|, Search the list for the position of the split point for |code|,
inserting a new split point if necessary. Returns index |i| such inserting a new split point if necessary. Returns index |i| such
...@@ -173,10 +174,10 @@ class TransitionMap(object): ...@@ -173,10 +174,10 @@ class TransitionMap(object):
else: else:
code_str = str(code) code_str = str(code)
map_strs.append(code_str) map_strs.append(code_str)
i = i + 1 i += 1
if i < n: if i < n:
map_strs.append(state_set_str(map[i])) map_strs.append(state_set_str(map[i]))
i = i + 1 i += 1
special_strs = {} special_strs = {}
for event, set in self.special.iteritems(): for event, set in self.special.iteritems():
special_strs[event] = state_set_str(set) special_strs[event] = state_set_str(set)
...@@ -199,7 +200,7 @@ class TransitionMap(object): ...@@ -199,7 +200,7 @@ class TransitionMap(object):
n = len(map) - 1 n = len(map) - 1
while i < n: while i < n:
self.dump_range(map[i], map[i + 2], map[i + 1], file) self.dump_range(map[i], map[i + 2], map[i + 1], file)
i = i + 2 i += 2
for event, set in self.special.iteritems(): for event, set in self.special.iteritems():
if set: if set:
if not event: if not event:
...@@ -234,6 +235,7 @@ class TransitionMap(object): ...@@ -234,6 +235,7 @@ class TransitionMap(object):
def dump_set(self, set): def dump_set(self, set):
return state_set_str(set) return state_set_str(set)
# #
# State set manipulation functions # State set manipulation functions
# #
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment