Commit 41e7e456 authored by Stefan Behnel's avatar Stefan Behnel

Evaluate multiplication of string literals at compile time if the result is...

Evaluate multiplication of string literals at compile time if the result is short (<= 256 characters).
parent 6ea5b2ec
...@@ -8,14 +8,16 @@ import itertools ...@@ -8,14 +8,16 @@ import itertools
from . import TypeSlots from . import TypeSlots
from .ExprNodes import not_a_constant from .ExprNodes import not_a_constant
import cython import cython
cython.declare(UtilityCode=object, EncodedString=object, bytes_literal=object, cython.declare(UtilityCode=object, EncodedString=object, bytes_literal=object, encoded_string=object,
Nodes=object, ExprNodes=object, PyrexTypes=object, Builtin=object, Nodes=object, ExprNodes=object, PyrexTypes=object, Builtin=object,
UtilNodes=object, _py_int_types=object) UtilNodes=object, _py_int_types=object)
if sys.version_info[0] >= 3: if sys.version_info[0] >= 3:
_py_int_types = int _py_int_types = int
_py_string_types = (bytes, str)
else: else:
_py_int_types = (int, long) _py_int_types = (int, long)
_py_string_types = (bytes, unicode)
from . import Nodes from . import Nodes
from . import ExprNodes from . import ExprNodes
...@@ -26,7 +28,7 @@ from . import UtilNodes ...@@ -26,7 +28,7 @@ from . import UtilNodes
from . import Options from . import Options
from .Code import UtilityCode, TempitaUtilityCode from .Code import UtilityCode, TempitaUtilityCode
from .StringEncoding import EncodedString, bytes_literal from .StringEncoding import EncodedString, bytes_literal, encoded_string
from .Errors import error from .Errors import error
from .ParseTreeTransforms import SkipDeclarations from .ParseTreeTransforms import SkipDeclarations
...@@ -4156,8 +4158,42 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations): ...@@ -4156,8 +4158,42 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations):
if isinstance(node.operand1, ExprNodes.IntNode) and \ if isinstance(node.operand1, ExprNodes.IntNode) and \
node.operand2.is_sequence_constructor: node.operand2.is_sequence_constructor:
return self._calculate_constant_seq(node, node.operand2, node.operand1) return self._calculate_constant_seq(node, node.operand2, node.operand1)
if node.operand1.is_string_literal:
return self._multiply_string(node, node.operand1, node.operand2)
elif node.operand2.is_string_literal:
return self._multiply_string(node, node.operand2, node.operand1)
return self.visit_BinopNode(node) return self.visit_BinopNode(node)
def _multiply_string(self, node, string_node, multiplier_node):
multiplier = multiplier_node.constant_result
if not isinstance(multiplier, _py_int_types):
return node
if not (node.has_constant_result() and isinstance(node.constant_result, _py_string_types)):
return node
if len(node.constant_result) > 256:
# Too long for static creation, leave it to runtime. (-> arbitrary limit)
return node
build_string = encoded_string
if isinstance(string_node, ExprNodes.BytesNode):
build_string = bytes_literal
elif isinstance(string_node, ExprNodes.StringNode):
if string_node.unicode_value is not None:
string_node.unicode_value = encoded_string(
string_node.unicode_value * multiplier,
string_node.unicode_value.encoding)
elif isinstance(string_node, ExprNodes.UnicodeNode):
if string_node.bytes_value is not None:
string_node.bytes_value = bytes_literal(
string_node.bytes_value * multiplier,
string_node.bytes_value.encoding)
else:
assert False, "unknown string node type: %s" % type(string_node)
string_node.value = build_string(
string_node.value * multiplier,
string_node.value.encoding)
return string_node
def _calculate_constant_seq(self, node, sequence_node, factor): def _calculate_constant_seq(self, node, sequence_node, factor):
if factor.constant_result != 1 and sequence_node.args: if factor.constant_result != 1 and sequence_node.args:
if isinstance(factor.constant_result, _py_int_types) and factor.constant_result <= 0: if isinstance(factor.constant_result, _py_int_types) and factor.constant_result <= 0:
......
...@@ -191,6 +191,14 @@ def bytes_literal(s, encoding): ...@@ -191,6 +191,14 @@ def bytes_literal(s, encoding):
return s return s
def encoded_string(s, encoding):
assert isinstance(s, (_unicode, bytes))
s = EncodedString(s)
if encoding is not None:
s.encoding = encoding
return s
char_from_escape_sequence = { char_from_escape_sequence = {
r'\a' : u'\a', r'\a' : u'\a',
r'\b' : u'\b', r'\b' : u'\b',
......
__doc__ = u""" __doc__ = u"""
>>> print(spam) >>> print(spam)
eggseggseggseggs eggseggseggseggs
>>> print(grail) >>> print(uspam)
tomatotomatotomatotomatotomatotomatotomato eggseggseggseggs
>>> print(bspam.decode('ascii'))
eggseggseggseggs
>>> print(grail)
tomatotomatotomatotomatotomatotomatotomato
>>> len(grail_long)
4200
>>> print(ugrail)
tomatotomatotomatotomatotomatotomatotomato
>>> len(ugrail_long)
4200
>>> print(bgrail.decode('ascii'))
tomatotomatotomatotomatotomatotomatotomato
>>> len(bgrail_long)
4200
""" """
spam = u"eggs" * 4 bspam = b"eggs" * 4
grail = 7 * u"tomato" bgrail = 7 * b"tomato"
bgrail_long = 700 * b"tomato"
spam = "eggs" * 4
grail = 7 * "tomato"
grail_long = 700 * "tomato"
uspam = u"eggs" * 4
ugrail = 7 * u"tomato"
ugrail_long = 700 * u"tomato"
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment