Commit 599b26e7 authored by Stefan Behnel's avatar Stefan Behnel

scanned stream is unicode, so use unicode strings for comparisons to avoid...

scanned stream is unicode, so use unicode strings for comparisons to avoid redundant recoding in Py2
parent cd7ea555
...@@ -7,11 +7,12 @@ ...@@ -7,11 +7,12 @@
# #
#======================================================================= #=======================================================================
import cython
cython.declare(BOL=object, EOL=object, EOF=object)
import Errors import Errors
from Regexps import BOL, EOL, EOF from Regexps import BOL, EOL, EOF
import cython
class Scanner(object): class Scanner(object):
""" """
A Scanner is used to read tokens from a stream of characters A Scanner is used to read tokens from a stream of characters
...@@ -77,7 +78,7 @@ class Scanner(object): ...@@ -77,7 +78,7 @@ class Scanner(object):
""" """
self.trace = 0 self.trace = 0
self.buffer = '' self.buffer = u''
self.buf_start_pos = 0 self.buf_start_pos = 0
self.next_pos = 0 self.next_pos = 0
self.cur_pos = 0 self.cur_pos = 0
...@@ -145,7 +146,7 @@ class Scanner(object): ...@@ -145,7 +146,7 @@ class Scanner(object):
if self.cur_char is EOL: if self.cur_char is EOL:
self.next_char() self.next_char()
if self.cur_char is None or self.cur_char is EOF: if self.cur_char is None or self.cur_char is EOF:
return ('', None) return (u'', None)
raise Errors.UnrecognizedInput(self, self.state_name) raise Errors.UnrecognizedInput(self, self.state_name)
def run_machine_inlined(self): def run_machine_inlined(self):
...@@ -205,9 +206,9 @@ class Scanner(object): ...@@ -205,9 +206,9 @@ class Scanner(object):
c = buffer[buf_index] c = buffer[buf_index]
next_pos = next_pos + 1 next_pos = next_pos + 1
else: else:
c = '' c = u''
# End inlined: c = self.read_char() # End inlined: c = self.read_char()
if c == '\n': if c == u'\n':
cur_char = EOL cur_char = EOL
input_state = 2 input_state = 2
elif not c: elif not c:
...@@ -216,7 +217,7 @@ class Scanner(object): ...@@ -216,7 +217,7 @@ class Scanner(object):
else: else:
cur_char = c cur_char = c
elif input_state == 2: elif input_state == 2:
cur_char = '\n' cur_char = u'\n'
input_state = 3 input_state = 3
elif input_state == 3: elif input_state == 3:
cur_line = cur_line + 1 cur_line = cur_line + 1
...@@ -227,7 +228,7 @@ class Scanner(object): ...@@ -227,7 +228,7 @@ class Scanner(object):
cur_char = EOF cur_char = EOF
input_state = 5 input_state = 5
else: # input_state = 5 else: # input_state = 5
cur_char = '' cur_char = u''
# End inlined self.next_char() # End inlined self.next_char()
else: # not new_state else: # not new_state
if trace: #TRACE# if trace: #TRACE#
...@@ -258,7 +259,7 @@ class Scanner(object): ...@@ -258,7 +259,7 @@ class Scanner(object):
if input_state == 1: if input_state == 1:
self.cur_pos = self.next_pos self.cur_pos = self.next_pos
c = self.read_char() c = self.read_char()
if c == '\n': if c == u'\n':
self.cur_char = EOL self.cur_char = EOL
self.input_state = 2 self.input_state = 2
elif not c: elif not c:
...@@ -267,7 +268,7 @@ class Scanner(object): ...@@ -267,7 +268,7 @@ class Scanner(object):
else: else:
self.cur_char = c self.cur_char = c
elif input_state == 2: elif input_state == 2:
self.cur_char = '\n' self.cur_char = u'\n'
self.input_state = 3 self.input_state = 3
elif input_state == 3: elif input_state == 3:
self.cur_line = self.cur_line + 1 self.cur_line = self.cur_line + 1
...@@ -278,7 +279,7 @@ class Scanner(object): ...@@ -278,7 +279,7 @@ class Scanner(object):
self.cur_char = EOF self.cur_char = EOF
self.input_state = 5 self.input_state = 5
else: # input_state = 5 else: # input_state = 5
self.cur_char = '' self.cur_char = u''
if self.trace: if self.trace:
print("--> [%d] %d %s" % (input_state, self.cur_pos, repr(self.cur_char))) print("--> [%d] %d %s" % (input_state, self.cur_pos, repr(self.cur_char)))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment