Visitor.py 19.8 KB
Newer Older
1 2
# cython: infer_types=True

3 4 5
#
#   Tree visitor and transform framework
#
6
import inspect
7 8
import TypeSlots
import Builtin
9 10
import Nodes
import ExprNodes
11
import Errors
12
import DebugFlags
13

14 15
import cython

16
class TreeVisitor(object):
17 18 19 20 21 22 23
    """
    Base class for writing visitors for a Cython tree, contains utilities for
    recursing such trees using visitors. Each node is
    expected to have a child_attrs iterable containing the names of attributes
    containing child nodes or lists of child nodes. Lists are not considered
    part of the tree structure (i.e. contained nodes are considered direct
    children of the parent node).
24

25 26 27 28 29 30
    visit_children visits each of the children of a given node (see the visit_children
    documentation). When recursing the tree using visit_children, an attribute
    access_path is maintained which gives information about the current location
    in the tree as a stack of tuples: (parent_node, attrname, index), representing
    the node, attribute and optional list index that was taken in each step in the path to
    the current node.
31

32
    Example:
33

Stefan Behnel's avatar
Stefan Behnel committed
34
    >>> class SampleNode(object):
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
    ...     child_attrs = ["head", "body"]
    ...     def __init__(self, value, head=None, body=None):
    ...         self.value = value
    ...         self.head = head
    ...         self.body = body
    ...     def __repr__(self): return "SampleNode(%s)" % self.value
    ...
    >>> tree = SampleNode(0, SampleNode(1), [SampleNode(2), SampleNode(3)])
    >>> class MyVisitor(TreeVisitor):
    ...     def visit_SampleNode(self, node):
    ...         print "in", node.value, self.access_path
    ...         self.visitchildren(node)
    ...         print "out", node.value
    ...
    >>> MyVisitor().visit(tree)
    in 0 []
    in 1 [(SampleNode(0), 'head', None)]
    out 1
    in 2 [(SampleNode(0), 'body', 0)]
    out 2
    in 3 [(SampleNode(0), 'body', 1)]
    out 3
    out 0
    """
    def __init__(self):
        super(TreeVisitor, self).__init__()
61
        self.dispatch_table = {}
62 63
        self.access_path = []

64
    def dump_node(self, node, indent=0):
65
        ignored = list(node.child_attrs) + [u'child_attrs', u'pos',
66
                                            u'gil_message', u'cpp_message',
67
                                            u'subexprs']
68
        values = []
69
        pos = getattr(node, 'pos', None)
70 71 72 73 74 75 76 77 78 79 80 81 82
        if pos:
            source = pos[0]
            if source:
                import os.path
                source = os.path.basename(source.get_description())
            values.append(u'%s:%s:%s' % (source, pos[1], pos[2]))
        attribute_names = dir(node)
        attribute_names.sort()
        for attr in attribute_names:
            if attr in ignored:
                continue
            if attr.startswith(u'_') or attr.endswith(u'_'):
                continue
83 84 85 86
            try:
                value = getattr(node, attr)
            except AttributeError:
                continue
87
            if value is None or value == 0:
88 89
                continue
            elif isinstance(value, list):
90
                value = u'[...]/%d' % len(value)
91 92 93 94 95 96 97 98
            elif not isinstance(value, (str, unicode, long, int, float)):
                continue
            else:
                value = repr(value)
            values.append(u'%s = %s' % (attr, value))
        return u'%s(%s)' % (node.__class__.__name__,
                           u',\n    '.join(values))

99 100 101 102 103 104 105 106 107 108 109
    def _find_node_path(self, stacktrace):
        import os.path
        last_traceback = stacktrace
        nodes = []
        while hasattr(stacktrace, 'tb_frame'):
            frame = stacktrace.tb_frame
            node = frame.f_locals.get(u'self')
            if isinstance(node, Nodes.Node):
                code = frame.f_code
                method_name = code.co_name
                pos = (os.path.basename(code.co_filename),
110
                       frame.f_lineno)
111 112 113 114 115
                nodes.append((node, method_name, pos))
                last_traceback = stacktrace
            stacktrace = stacktrace.tb_next
        return (last_traceback, nodes)

116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
    def _raise_compiler_error(self, child, e):
        import sys
        trace = ['']
        for parent, attribute, index in self.access_path:
            node = getattr(parent, attribute)
            if index is None:
                index = ''
            else:
                node = node[index]
                index = u'[%d]' % index
            trace.append(u'%s.%s%s = %s' % (
                parent.__class__.__name__, attribute, index,
                self.dump_node(node)))
        stacktrace, called_nodes = self._find_node_path(sys.exc_info()[2])
        last_node = child
        for node, method_name, pos in called_nodes:
            last_node = node
            trace.append(u"File '%s', line %d, in %s: %s" % (
                pos[0], pos[1], method_name, self.dump_node(node)))
        raise Errors.CompilerCrash(
136
            getattr(last_node, 'pos', None), self.__class__.__name__,
137 138
            u'\n'.join(trace), e, stacktrace)

139
    @cython.final
140 141 142 143 144 145 146
    def find_handler(self, obj):
        # to resolve, try entire hierarchy
        cls = type(obj)
        pattern = "visit_%s"
        mro = inspect.getmro(cls)
        handler_method = None
        for mro_cls in mro:
Stefan Behnel's avatar
Stefan Behnel committed
147 148 149 150 151 152 153 154 155
            handler_method = getattr(self, pattern % mro_cls.__name__, None)
            if handler_method is not None:
                return handler_method
        print type(self), cls
        if self.access_path:
            print self.access_path
            print self.access_path[-1][0].pos
            print self.access_path[-1][0].__dict__
        raise RuntimeError("Visitor %r does not accept object: %s" % (self, obj))
156 157 158 159

    def visit(self, obj):
        return self._visit(obj)

160
    @cython.final
161
    def _visit(self, obj):
162
        try:
163
            try:
164
                handler_method = self.dispatch_table[type(obj)]
165
            except KeyError:
166 167 168
                handler_method = self.find_handler(obj)
                self.dispatch_table[type(obj)] = handler_method
            return handler_method(obj)
169
        except Errors.CompileError:
170
            raise
171 172
        except Errors.AbortError:
            raise
173
        except Exception, e:
174 175
            if DebugFlags.debug_no_exception_intercept:
                raise
176 177 178 179 180 181
            self._raise_compiler_error(obj, e)

    @cython.final
    def _visitchild(self, child, parent, attrname, idx):
        self.access_path.append((parent, attrname, idx))
        result = self._visit(child)
182 183 184 185
        self.access_path.pop()
        return result

    def visitchildren(self, parent, attrs=None):
186 187
        return self._visitchildren(parent, attrs)

188
    @cython.final
189
    def _visitchildren(self, parent, attrs):
190 191 192
        """
        Visits the children of the given parent. If parent is None, returns
        immediately (returning None).
193

194 195 196 197 198 199 200 201 202 203 204
        The return value is a dictionary giving the results for each
        child (mapping the attribute name to either the return value
        or a list of return values (in the case of multiple children
        in an attribute)).
        """
        if parent is None: return None
        result = {}
        for attr in parent.child_attrs:
            if attrs is not None and attr not in attrs: continue
            child = getattr(parent, attr)
            if child is not None:
205
                if type(child) is list:
206
                    childretval = [self._visitchild(x, parent, attr, idx) for idx, x in enumerate(child)]
207
                else:
208
                    childretval = self._visitchild(child, parent, attr, None)
209
                    assert not isinstance(childretval, list), 'Cannot insert list here: %s in %r' % (attr, parent)
210 211 212 213 214 215 216 217
                result[attr] = childretval
        return result


class VisitorTransform(TreeVisitor):
    """
    A tree transform is a base class for visitors that wants to do stream
    processing of the structure (rather than attributes etc.) of a tree.
218

219
    It implements __call__ to simply visit the argument node.
220

221 222 223 224
    It requires the visitor methods to return the nodes which should take
    the place of the visited node in the result tree (which can be the same
    or one or more replacement). Specifically, if the return value from
    a visitor method is:
225

226 227 228 229 230 231 232 233 234
    - [] or None; the visited node will be removed (set to None if an attribute and
    removed if in a list)
    - A single node; the visited node will be replaced by the returned node.
    - A list of nodes; the visited nodes will be replaced by all the nodes in the
    list. This will only work if the node was already a member of a list; if it
    was not, an exception will be raised. (Typically you want to ensure that you
    are within a StatListNode or similar before doing this.)
    """
    def visitchildren(self, parent, attrs=None):
235
        result = self._visitchildren(parent, attrs)
236
        for attr, newnode in result.iteritems():
237
            if not type(newnode) is list:
238 239 240 241 242 243
                setattr(parent, attr, newnode)
            else:
                # Flatten the list one level and remove any None
                newlist = []
                for x in newnode:
                    if x is not None:
244
                        if type(x) is list:
245 246 247 248
                            newlist += x
                        else:
                            newlist.append(x)
                setattr(parent, attr, newlist)
249
        return result
250 251 252 253

    def recurse_to_children(self, node):
        self.visitchildren(node)
        return node
254

255
    def __call__(self, root):
256
        return self._visit(root)
257

258 259
class CythonTransform(VisitorTransform):
    """
260
    Certain common conventions and utilities for Cython transforms.
261 262 263

     - Sets up the context of the pipeline in self.context
     - Tracks directives in effect in self.current_directives
264 265 266 267 268
    """
    def __init__(self, context):
        super(CythonTransform, self).__init__()
        self.context = context

269 270 271 272 273 274 275 276 277 278 279 280 281
    def __call__(self, node):
        import ModuleNode
        if isinstance(node, ModuleNode.ModuleNode):
            self.current_directives = node.directives
        return super(CythonTransform, self).__call__(node)

    def visit_CompilerDirectivesNode(self, node):
        old = self.current_directives
        self.current_directives = node.directives
        self.visitchildren(node)
        self.current_directives = old
        return node

282 283 284 285
    def visit_Node(self, node):
        self.visitchildren(node)
        return node

Dag Sverre Seljebotn's avatar
Dag Sverre Seljebotn committed
286 287
class ScopeTrackingTransform(CythonTransform):
    # Keeps track of type of scopes
288 289
    #scope_type: can be either of 'module', 'function', 'cclass', 'pyclass', 'struct'
    #scope_node: the node that owns the current scope
290

Dag Sverre Seljebotn's avatar
Dag Sverre Seljebotn committed
291 292 293 294 295 296 297 298 299 300 301 302 303
    def visit_ModuleNode(self, node):
        self.scope_type = 'module'
        self.scope_node = node
        self.visitchildren(node)
        return node

    def visit_scope(self, node, scope_type):
        prev = self.scope_type, self.scope_node
        self.scope_type = scope_type
        self.scope_node = node
        self.visitchildren(node)
        self.scope_type, self.scope_node = prev
        return node
304

Dag Sverre Seljebotn's avatar
Dag Sverre Seljebotn committed
305 306 307 308 309 310 311 312 313 314 315 316
    def visit_CClassDefNode(self, node):
        return self.visit_scope(node, 'cclass')

    def visit_PyClassDefNode(self, node):
        return self.visit_scope(node, 'pyclass')

    def visit_FuncDefNode(self, node):
        return self.visit_scope(node, 'function')

    def visit_CStructOrUnionDefNode(self, node):
        return self.visit_scope(node, 'struct')

317 318 319

class EnvTransform(CythonTransform):
    """
320
    This transformation keeps a stack of the environments.
321 322
    """
    def __call__(self, root):
323
        self.env_stack = [(root, root.scope)]
324
        return super(EnvTransform, self).__call__(root)
325 326

    def current_env(self):
327 328 329 330
        return self.env_stack[-1][1]

    def current_scope_node(self):
        return self.env_stack[-1][0]
331

332
    def visit_FuncDefNode(self, node):
333
        self.env_stack.append((node, node.local_scope))
334 335 336 337
        self.visitchildren(node)
        self.env_stack.pop()
        return node

338 339 340 341
    def visit_GeneratorBodyDefNode(self, node):
        self.visitchildren(node)
        return node

342 343 344 345 346 347
    def visit_ClassDefNode(self, node):
        self.env_stack.append((node, node.scope))
        self.visitchildren(node)
        self.env_stack.pop()
        return node

348 349 350 351 352 353
    def visit_CStructOrUnionDefNode(self, node):
        self.env_stack.append((node, node.scope))
        self.visitchildren(node)
        self.env_stack.pop()
        return node

354 355 356 357 358 359 360 361 362
    def visit_ScopedExprNode(self, node):
        if node.expr_scope:
            self.env_stack.append((node, node.expr_scope))
            self.visitchildren(node)
            self.env_stack.pop()
        else:
            self.visitchildren(node)
        return node

363

364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
class MethodDispatcherTransform(EnvTransform):
    """
    Base class for transformations that want to intercept on specific
    builtin functions or methods of builtin types.  Must run after
    declaration analysis when entries were assigned.

    Naming pattern for handler methods is as follows:

    * builtin functions: _handle_(general|simple|any)_function_NAME

    * builtin methods: _handle_(general|simple|any)_method_TYPENAME_METHODNAME
    """
    # only visit call nodes
    def visit_GeneralCallNode(self, node):
        self.visitchildren(node)
        function = node.function
        if not function.type.is_pyobject:
            return node
        arg_tuple = node.positional_args
        if not isinstance(arg_tuple, ExprNodes.TupleNode):
            return node
        keyword_args = node.keyword_args
        if keyword_args and not isinstance(keyword_args, ExprNodes.DictNode):
            # can't handle **kwargs
            return node
        args = arg_tuple.args
        return self._dispatch_to_handler(
            node, function, args, keyword_args)

    def visit_SimpleCallNode(self, node):
        self.visitchildren(node)
        function = node.function
        if function.type.is_pyobject:
            arg_tuple = node.arg_tuple
            if not isinstance(arg_tuple, ExprNodes.TupleNode):
                return node
            args = arg_tuple.args
        else:
            args = node.args
        return self._dispatch_to_handler(
            node, function, args)

    ### dispatch to specific handlers

    def _find_handler(self, match_name, has_kwargs):
        call_type = has_kwargs and 'general' or 'simple'
        handler = getattr(self, '_handle_%s_%s' % (call_type, match_name), None)
        if handler is None:
            handler = getattr(self, '_handle_any_%s' % match_name, None)
        return handler

    def _dispatch_to_handler(self, node, function, arg_list, kwargs=None):
        if function.is_name:
            # we only consider functions that are either builtin
            # Python functions or builtins that were already replaced
            # into a C function call (defined in the builtin scope)
            if not function.entry:
                return node
            is_builtin = function.entry.is_builtin or\
                         function.entry is self.current_env().builtin_scope().lookup_here(function.name)
            if not is_builtin:
                return node
            function_handler = self._find_handler(
                "function_%s" % function.name, kwargs)
            if function_handler is None:
                return node
            if kwargs:
                return function_handler(node, arg_list, kwargs)
            else:
                return function_handler(node, arg_list)
        elif function.is_attribute and function.type.is_pyobject:
            attr_name = function.attribute
            self_arg = function.obj
            obj_type = self_arg.type
            is_unbound_method = False
            if obj_type.is_builtin_type:
440 441
                if (obj_type is Builtin.type_type and self_arg.is_name and
                        arg_list and arg_list[0].type.is_pyobject):
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469
                    # calling an unbound method like 'list.append(L,x)'
                    # (ignoring 'type.mro()' here ...)
                    type_name = function.obj.name
                    self_arg = None
                    is_unbound_method = True
                else:
                    type_name = obj_type.name
            else:
                type_name = "object" # safety measure
            method_handler = self._find_handler(
                "method_%s_%s" % (type_name, attr_name), kwargs)
            if method_handler is None:
                if attr_name in TypeSlots.method_name_to_slot\
                or attr_name == '__new__':
                    method_handler = self._find_handler(
                        "slot%s" % attr_name, kwargs)
                if method_handler is None:
                    return node
            if self_arg is not None:
                arg_list = [self_arg] + list(arg_list)
            if kwargs:
                return method_handler(node, arg_list, kwargs, is_unbound_method)
            else:
                return method_handler(node, arg_list, is_unbound_method)
        else:
            return node


470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
class RecursiveNodeReplacer(VisitorTransform):
    """
    Recursively replace all occurrences of a node in a subtree by
    another node.
    """
    def __init__(self, orig_node, new_node):
        super(RecursiveNodeReplacer, self).__init__()
        self.orig_node, self.new_node = orig_node, new_node

    def visit_Node(self, node):
        self.visitchildren(node)
        if node is self.orig_node:
            return self.new_node
        else:
            return node

486 487 488
def recursively_replace_node(tree, old_node, new_node):
    replace_in = RecursiveNodeReplacer(old_node, new_node)
    replace_in(tree)
489 490


491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506
# Utils
def replace_node(ptr, value):
    """Replaces a node. ptr is of the form used on the access path stack
    (parent, attrname, listidx|None)
    """
    parent, attrname, listidx = ptr
    if listidx is None:
        setattr(parent, attrname, value)
    else:
        getattr(parent, attrname)[listidx] = value

class PrintTree(TreeVisitor):
    """Prints a representation of the tree to standard output.
    Subclass and override repr_of to provide more information
    about nodes. """
    def __init__(self):
507
        TreeVisitor.__init__(self)
508 509 510 511 512 513 514 515 516
        self._indent = ""

    def indent(self):
        self._indent += "  "
    def unindent(self):
        self._indent = self._indent[:-2]

    def __call__(self, tree, phase=None):
        print("Parse tree dump at phase '%s'" % phase)
517
        self._visit(tree)
Robert Bradshaw's avatar
Robert Bradshaw committed
518
        return tree
519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550

    # Don't do anything about process_list, the defaults gives
    # nice-looking name[idx] nodes which will visually appear
    # under the parent-node, not displaying the list itself in
    # the hierarchy.
    def visit_Node(self, node):
        if len(self.access_path) == 0:
            name = "(root)"
        else:
            parent, attr, idx = self.access_path[-1]
            if idx is not None:
                name = "%s[%d]" % (attr, idx)
            else:
                name = attr
        print("%s- %s: %s" % (self._indent, name, self.repr_of(node)))
        self.indent()
        self.visitchildren(node)
        self.unindent()
        return node

    def repr_of(self, node):
        if node is None:
            return "(none)"
        else:
            result = node.__class__.__name__
            if isinstance(node, ExprNodes.NameNode):
                result += "(type=%s, name=\"%s\")" % (repr(node.type), node.name)
            elif isinstance(node, Nodes.DefNode):
                result += "(name=\"%s\")" % node.name
            elif isinstance(node, ExprNodes.ExprNode):
                t = node.type
                result += "(type=%s)" % repr(t)
551 552 553 554 555 556 557 558 559
            elif node.pos:
                pos = node.pos
                path = pos[0].get_description()
                if '/' in path:
                    path = path.split('/')[-1]
                if '\\' in path:
                    path = path.split('\\')[-1]
                result += "(pos=(%s:%s:%s))" % (path, pos[1], pos[2])

560 561 562 563 564
            return result

if __name__ == "__main__":
    import doctest
    doctest.testmod()