Commit f6fdecac authored by Stefan Behnel's avatar Stefan Behnel

some more cythonisation in Plex scanner classes (15% faster for lxml)

parent 97ed7663
......@@ -6,28 +6,29 @@ cdef class Scanner:
cdef public stream
cdef public name
cdef public buffer
cdef public long buf_start_pos
cdef public long next_pos
cdef public long cur_pos
cdef public long cur_line
cdef public long cur_line_start
cdef public long start_pos
cdef public long start_line
cdef public long start_col
cdef public Py_ssize_t buf_start_pos
cdef public Py_ssize_t next_pos
cdef public Py_ssize_t cur_pos
cdef public Py_ssize_t cur_line
cdef public Py_ssize_t cur_line_start
cdef public Py_ssize_t start_pos
cdef public Py_ssize_t start_line
cdef public Py_ssize_t start_col
cdef public text
cdef public initial_state # int?
cdef public state_name
cdef public list queue
cdef public bint trace
cdef public cur_char
cdef public input_state
cdef public int input_state
cdef public level
@cython.locals(input_state=long)
cpdef next_char(self)
cpdef read(self)
cpdef position(self)
cpdef tuple scan_a_token(self)
cpdef tuple position(self)
@cython.locals(cur_pos=cython.long, cur_line=cython.long,
cur_line_start=cython.long, input_state=cython.long,
......
......@@ -75,6 +75,8 @@ class Scanner:
|name| is optional, and may be the name of the file being
scanned or any other identifying string.
"""
self.trace = 0
self.buffer = ''
self.buf_start_pos = 0
self.next_pos = 0
......@@ -135,7 +137,7 @@ class Scanner:
# else:
# action = self.run_machine_inlined()
action = self.run_machine_inlined()
if action:
if action is not None:
if self.trace:
print("Scanner: read: Performing %s %d:%d" % (
action, self.start_pos, self.cur_pos))
......@@ -144,21 +146,11 @@ class Scanner:
return (text, action)
else:
if self.cur_pos == self.start_pos:
if self.cur_char == EOL:
if self.cur_char is EOL:
self.next_char()
if not self.cur_char or self.cur_char == EOF:
if self.cur_char is None or self.cur_char is EOF:
return ('', None)
raise Errors.UnrecognizedInput(self, self.state_name)
def run_machine(self):
"""
Run the machine until no more transitions are possible.
"""
self.state = self.initial_state
self.backup_state = None
while self.transition():
pass
return self.back_up()
def run_machine_inlined(self):
"""
......@@ -183,7 +175,7 @@ class Scanner:
# Begin inlined self.save_for_backup()
#action = state.action #@slow
action = state['action'] #@fast
if action:
if action is not None:
backup_state = (
action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos)
# End inlined self.save_for_backup()
......@@ -245,7 +237,7 @@ class Scanner:
if trace: #TRACE#
print("blocked") #TRACE#
# Begin inlined: action = self.back_up()
if backup_state:
if backup_state is not None:
(action, cur_pos, cur_line, cur_line_start,
cur_char, input_state, next_pos) = backup_state
else:
......@@ -259,46 +251,9 @@ class Scanner:
self.input_state = input_state
self.next_pos = next_pos
if trace: #TRACE#
if action: #TRACE#
print("Doing " + action) #TRACE#
if action is not None: #TRACE#
print("Doing %s" % action) #TRACE#
return action
# def transition(self):
# self.save_for_backup()
# c = self.cur_char
# new_state = self.state.new_state(c)
# if new_state:
# if self.trace:
# print "Scanner: read: State %d: %s --> State %d" % (
# self.state.number, repr(c), new_state.number)
# self.state = new_state
# self.next_char()
# return 1
# else:
# if self.trace:
# print "Scanner: read: State %d: %s --> blocked" % (
# self.state.number, repr(c))
# return 0
# def save_for_backup(self):
# action = self.state.get_action()
# if action:
# if self.trace:
# print "Scanner: read: Saving backup point at", self.cur_pos
# self.backup_state = (
# action, self.cur_pos, self.cur_line, self.cur_line_start,
# self.cur_char, self.input_state, self.next_pos)
# def back_up(self):
# backup_state = self.backup_state
# if backup_state:
# (action, self.cur_pos, self.cur_line, self.cur_line_start,
# self.cur_char, self.input_state, self.next_pos) = backup_state
# if self.trace:
# print "Scanner: read: Backing up to", self.cur_pos
# return action
# else:
# return None
def next_char(self):
input_state = self.input_state
......@@ -330,26 +285,7 @@ class Scanner:
self.cur_char = ''
if self.trace:
print("--> [%d] %d %s" % (input_state, self.cur_pos, repr(self.cur_char)))
# def read_char(self):
# """
# Get the next input character, filling the buffer if necessary.
# Returns '' at end of file.
# """
# next_pos = self.next_pos
# buf_index = next_pos - self.buf_start_pos
# if buf_index == len(self.buffer):
# discard = self.start_pos - self.buf_start_pos
# data = self.stream.read(0x1000)
# self.buffer = self.buffer[discard:] + data
# self.buf_start_pos = self.buf_start_pos + discard
# buf_index = buf_index - discard
# if not data:
# return ''
# c = self.buffer[buf_index]
# self.next_pos = next_pos + 1
# return c
def position(self):
"""
Return a tuple (name, line, col) representing the location of
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment