Merged with libpython.py from Python3.2 + prevented continuation with negative...

Merged with libpython.py from Python3.2 + prevented continuation with negative pointer values gdb was giving to avoid segfaulting the inferior when using 'cy exec'

Merged with libpython.py from Python3.2 + prevented continuation with negative...
Merged with libpython.py from Python3.2 + prevented continuation with negative pointer values gdb was giving to avoid segfaulting the inferior when using 'cy exec'
7c6598c1 · Mark Florisson · 391a3837 · 7c6598c1 · 7c6598c1 · 7c6598c1
Commit 7c6598c1 authored Nov 25, 2010 by Mark Florisson
Showing with 251 additions and 33 deletions

Cython/Debugger/Cygdb.py Cython/Debugger/Cygdb.py +3 -1

Cython/Debugger/libcython.py Cython/Debugger/libcython.py +3 -3

Cython/Debugger/libpython.py Cython/Debugger/libpython.py +245 -29

No files found.
--- a/Cython/Debugger/Cygdb.py
+++ b/Cython/Debugger/Cygdb.py
@@ -42,7 +42,9 @@ def make_command_file(path_to_debug_info, prefix_code='', no_import=False):
    f.write('python from Cython.Debugger import libcython\n')
    
    if no_import:
-        f.write("file %s\n" % sys.executable)
+        # don't do this, this overrides file command in .gdbinit
+        # f.write("file %s\n" % sys.executable)
+        pass
    else:
        path = os.path.join(path_to_debug_info, "cython_debug", "interpreter")
        interpreter = open(path).read()

--- a/Cython/Debugger/libcython.py
+++ b/Cython/Debugger/libcython.py
@@ -1152,9 +1152,9 @@ class CyExec(CythonCommand, libpython.PyExec):
                
                pystringp = executor.alloc_pystring(name)
                code = '''
-                    PyDict_SetItem(
-                        (PyObject *) %d, 
-                        (PyObject *) %d, 
+                    (PyObject *) PyDict_SetItem(
+                        (PyObject *) %d,
+                        (PyObject *) %d,
                        (PyObject *) %s)
                ''' % (local_dict_pointer, pystringp, cyvar.cname)


--- a/Cython/Debugger/libpython.py
+++ b/Cython/Debugger/libpython.py
@@ -48,6 +48,7 @@ from __future__ import with_statement
 import os
 import re
 import sys
+import locale
 import atexit
 import warnings
 import tempfile
@@ -70,6 +71,7 @@ Py_TPFLAGS_LONG_SUBCLASS     = (1L << 24)
 Py_TPFLAGS_LIST_SUBCLASS     = (1L << 25)
 Py_TPFLAGS_TUPLE_SUBCLASS    = (1L << 26)
 Py_TPFLAGS_STRING_SUBCLASS   = (1L << 27)
+Py_TPFLAGS_BYTES_SUBCLASS    = (1L << 27)
 Py_TPFLAGS_UNICODE_SUBCLASS  = (1L << 28)
 Py_TPFLAGS_DICT_SUBCLASS     = (1L << 29)
 Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
@@ -78,6 +80,10 @@ Py_TPFLAGS_TYPE_SUBCLASS     = (1L << 31)

 MAX_OUTPUT_LEN=1024

+hexdigits = "0123456789abcdef"
+
+ENCODING = locale.getpreferredencoding()
+
 class NullPyObjectPtr(RuntimeError):
    pass

@@ -94,6 +100,30 @@ def safe_range(val):
    # threshold in case the data was corrupted
    return xrange(safety_limit(val))

+def write_unicode(file, text):
+    # Write a byte or unicode string to file. Unicode strings are encoded to
+    # ENCODING encoding with 'backslashreplace' error handler to avoid
+    # UnicodeEncodeError.
+    if isinstance(text, unicode):
+        text = text.encode(ENCODING, 'backslashreplace')
+    file.write(text)
+
+def os_fsencode(filename):
+    if not isinstance(filename, unicode):
+        return filename
+    encoding = sys.getfilesystemencoding()
+    if encoding == 'mbcs':
+        # mbcs doesn't support surrogateescape
+        return filename.encode(encoding)
+    encoded = []
+    for char in filename:
+        # surrogateescape error handler
+        if 0xDC80 <= ord(char) <= 0xDCFF:
+            byte = chr(ord(char) - 0xDC00)
+        else:
+            byte = char.encode(encoding)
+        encoded.append(byte)
+    return ''.join(encoded)

 class StringTruncated(RuntimeError):
    pass
@@ -160,16 +190,9 @@ class PyObjectPtr(object):
            return pyo_ptr.dereference()[name]

        if name == 'ob_size':
-            try:
-            # Python 2:
-                return self._gdbval.dereference()[name]
-            except RuntimeError:
-                # Python 3:
-                try:
-                    return self._gdbval.dereference()['ob_base'][name]
-                except RuntimeError:
-                    return 0
-                
+            pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
+            return pyo_ptr.dereference()[name]
+
        # General case: look it up inside the object:
        return self._gdbval.dereference()[name]

@@ -370,6 +393,8 @@ class PyObjectPtr(object):
    def as_address(self):
        return long(self._gdbval)

+class PyVarObjectPtr(PyObjectPtr):
+    _typename = 'PyVarObject'

 class ProxyAlreadyVisited(object):
    '''
@@ -785,6 +810,22 @@ class PyLongObjectPtr(PyObjectPtr):
            result = -result
        return result

+    def write_repr(self, out, visited):
+        # Write this out as a Python 3 int literal, i.e. without the "L" suffix
+        proxy = self.proxyval(visited)
+        out.write("%s" % proxy)
+
+
+class PyBoolObjectPtr(PyLongObjectPtr):
+    """
+    Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
+    <bool> instances (Py_True/Py_False) within the process being debugged.
+    """
+    def proxyval(self, visited):
+        if PyLongObjectPtr.proxyval(self, visited):
+            return True
+        else:
+            return False

 class PyNoneStructPtr(PyObjectPtr):
    """
@@ -898,7 +939,8 @@ class PyFrameObjectPtr(PyObjectPtr):
        newline character'''
        if self.is_optimized_out():
            return '(frame information optimized out)'
-        with open(self.filename(), 'r') as f:
+        filename = self.filename()
+        with open(os_fsencode(filename), 'r') as f:
            all_lines = f.readlines()
            # Convert from 1-based current_line_num to 0-based list offset:
            return all_lines[self.current_line_num()-1]
@@ -909,9 +951,9 @@ class PyFrameObjectPtr(PyObjectPtr):
            return
        out.write('Frame 0x%x, for file %s, line %i, in %s ('
                  % (self.as_address(),
-                     self.co_filename,
+                     self.co_filename.proxyval(visited),
                     self.current_line_num(),
-                     self.co_name))
+                     self.co_name.proxyval(visited)))
        first = True
        for pyop_name, pyop_value in self.iter_locals():
            if not first:
@@ -948,7 +990,8 @@ class PySetObjectPtr(PyObjectPtr):
            return set(members)

    def write_repr(self, out, visited):
-        out.write(self.safe_tp_name())
+        # Emulate Python 3's set_repr
+        tp_name = self.safe_tp_name()

        # Guard against infinite loops:
        if self.as_address() in visited:
@@ -956,7 +999,18 @@ class PySetObjectPtr(PyObjectPtr):
            return
        visited.add(self.as_address())

-        out.write('([')
+        # Python 3's set_repr special-cases the empty set:
+        if not self.field('used'):
+            out.write(tp_name)
+            out.write('()')
+            return
+
+        # Python 3 uses {} for set literals:
+        if tp_name != 'set':
+            out.write(tp_name)
+            out.write('(')
+
+        out.write('{')
        first = True
        table = self.field('table')
        for i in safe_range(self.field('mask')+1):
@@ -970,23 +1024,55 @@ class PySetObjectPtr(PyObjectPtr):
                        out.write(', ')
                    first = False
                    pyop_key.write_repr(out, visited)
-        out.write('])')
+        out.write('}')

+        if tp_name != 'set':
+            out.write(')')

-class PyStringObjectPtr(PyObjectPtr):
-    _typename = 'PyStringObject'
+
+class PyBytesObjectPtr(PyObjectPtr):
+    _typename = 'PyBytesObject'

    def __str__(self):
        field_ob_size = self.field('ob_size')
        field_ob_sval = self.field('ob_sval')
-        char_ptr = field_ob_sval.address.cast(
-            gdb.lookup_type('char').pointer())
-        
+        char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr)
        return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])

    def proxyval(self, visited):
        return str(self)

+    def write_repr(self, out, visited):
+        # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
+
+        # Get a PyStringObject* within the Python 2 gdb process:
+        proxy = self.proxyval(visited)
+
+        # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
+        # to Python 2 code:
+        quote = "'"
+        if "'" in proxy and not '"' in proxy:
+            quote = '"'
+        out.write('b')
+        out.write(quote)
+        for byte in proxy:
+            if byte == quote or byte == '\\':
+                out.write('\\')
+                out.write(byte)
+            elif byte == '\t':
+                out.write('\\t')
+            elif byte == '\n':
+                out.write('\\n')
+            elif byte == '\r':
+                out.write('\\r')
+            elif byte < ' ' or ord(byte) >= 0x7f:
+                out.write('\\x')
+                out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
+                out.write(hexdigits[ord(byte) & 0xf])
+            else:
+                out.write(byte)
+        out.write(quote)
+
 class PyTupleObjectPtr(PyObjectPtr):
    _typename = 'PyTupleObject'

@@ -1027,6 +1113,13 @@ class PyTypeObjectPtr(PyObjectPtr):
    _typename = 'PyTypeObject'


+def _unichr_is_printable(char):
+    # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
+    if char == u" ":
+        return True
+    import unicodedata
+    return unicodedata.category(char) not in ("C", "Z")
+
 if sys.maxunicode >= 0x10000:
    _unichr = unichr
 else:
@@ -1085,6 +1178,116 @@ class PyUnicodeObjectPtr(PyObjectPtr):
        result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
        return result

+    def write_repr(self, out, visited):
+        # Write this out as a Python 3 str literal, i.e. without a "u" prefix
+
+        # Get a PyUnicodeObject* within the Python 2 gdb process:
+        proxy = self.proxyval(visited)
+
+        # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
+        # to Python 2:
+        if "'" in proxy and '"' not in proxy:
+            quote = '"'
+        else:
+            quote = "'"
+        out.write(quote)
+
+        i = 0
+        while i < len(proxy):
+            ch = proxy[i]
+            i += 1
+
+            # Escape quotes and backslashes
+            if ch == quote or ch == '\\':
+                out.write('\\')
+                out.write(ch)
+
+            #  Map special whitespace to '\t', \n', '\r'
+            elif ch == '\t':
+                out.write('\\t')
+            elif ch == '\n':
+                out.write('\\n')
+            elif ch == '\r':
+                out.write('\\r')
+
+            # Map non-printable US ASCII to '\xhh' */
+            elif ch < ' ' or ch == 0x7F:
+                out.write('\\x')
+                out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
+                out.write(hexdigits[ord(ch) & 0x000F])
+
+            # Copy ASCII characters as-is
+            elif ord(ch) < 0x7F:
+                out.write(ch)
+
+            # Non-ASCII characters
+            else:
+                ucs = ch
+                ch2 = None
+                if sys.maxunicode < 0x10000:
+                    # If sizeof(Py_UNICODE) is 2 here (in gdb), join
+                    # surrogate pairs before calling _unichr_is_printable.
+                    if (i < len(proxy)
+                    and 0xD800 <= ord(ch) < 0xDC00 \
+                    and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
+                        ch2 = proxy[i]
+                        ucs = ch + ch2
+                        i += 1
+
+                # Unfortuately, Python 2's unicode type doesn't seem
+                # to expose the "isprintable" method
+                printable = _unichr_is_printable(ucs)
+                if printable:
+                    try:
+                        ucs.encode(ENCODING)
+                    except UnicodeEncodeError:
+                        printable = False
+
+                # Map Unicode whitespace and control characters
+                # (categories Z* and C* except ASCII space)
+                if not printable:
+                    if ch2 is not None:
+                        # Match Python 3's representation of non-printable
+                        # wide characters.
+                        code = (ord(ch) & 0x03FF) << 10
+                        code |= ord(ch2) & 0x03FF
+                        code += 0x00010000
+                    else:
+                        code = ord(ucs)
+
+                    # Map 8-bit characters to '\\xhh'
+                    if code <= 0xff:
+                        out.write('\\x')
+                        out.write(hexdigits[(code >> 4) & 0x000F])
+                        out.write(hexdigits[code & 0x000F])
+                    # Map 21-bit characters to '\U00xxxxxx'
+                    elif code >= 0x10000:
+                        out.write('\\U')
+                        out.write(hexdigits[(code >> 28) & 0x0000000F])
+                        out.write(hexdigits[(code >> 24) & 0x0000000F])
+                        out.write(hexdigits[(code >> 20) & 0x0000000F])
+                        out.write(hexdigits[(code >> 16) & 0x0000000F])
+                        out.write(hexdigits[(code >> 12) & 0x0000000F])
+                        out.write(hexdigits[(code >> 8) & 0x0000000F])
+                        out.write(hexdigits[(code >> 4) & 0x0000000F])
+                        out.write(hexdigits[code & 0x0000000F])
+                    # Map 16-bit characters to '\uxxxx'
+                    else:
+                        out.write('\\u')
+                        out.write(hexdigits[(code >> 12) & 0x000F])
+                        out.write(hexdigits[(code >> 8) & 0x000F])
+                        out.write(hexdigits[(code >> 4) & 0x000F])
+                        out.write(hexdigits[code & 0x000F])
+                else:
+                    # Copy characters as-is
+                    out.write(ch)
+                    if ch2 is not None:
+                        out.write(ch2)
+
+        out.write(quote)
+
+
+

 def int_from_int(gdbval):
    return int(str(gdbval))
@@ -1254,7 +1457,8 @@ class Frame(object):
        if self.is_evalframeex():
            pyop = self.get_pyop()
            if pyop:
-                sys.stdout.write('#%i %s\n' % (self.get_index(), pyop.get_truncated_repr(MAX_OUTPUT_LEN)))
+                line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
+                write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
                sys.stdout.write(pyop.current_line())
            else:
                sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
@@ -1315,7 +1519,7 @@ class PyList(gdb.Command):
        if start<1:
            start = 1

-        with open(filename, 'r') as f:
+        with open(os_fsencode(filename), 'r') as f:
            all_lines = f.readlines()
            # start and end are 1-based, all_lines is 0-based;
            # so [start-1:end] as a python slice gives us [start, end] as a
@@ -1487,8 +1691,7 @@ class PyNameEquals(gdb.Function):
            pyframe = frame.get_pyop()
            if pyframe is None:
                return None
-            
-            return str(getattr(pyframe, attr))
+            val = str(getattr(pyframe, attr)); print val, val.proxyval(set()); return val
        
        return None
    
@@ -1922,7 +2125,7 @@ Py_single_input = 256
 Py_file_input = 257
 Py_eval_input = 258

-def pointervalue(gdbval):
+def _pointervalue(gdbval):
    """
    Return the value of the pionter as a Python int. 
    
@@ -1936,11 +2139,23 @@ def pointervalue(gdbval):
        # still convert the pointer to an int
        return long(gdbval)

+def pointervalue(gdbval):
+    pointer = _pointervalue(gdbval)
+    try:
+        if pointer < 0:
+            raise gdb.GdbError("Negative pointer value, presumably a bug "
+                               "in gdb, aborting.")
+    except RuntimeError:
+        # work around yet another bug in gdb where you get random behaviour
+        # and tracebacks
+        pass
+        
+    return pointer

 class PythonCodeExecutor(object):
        
    def malloc(self, size):
-        chunk = (gdb.parse_and_eval("(void *) malloc(%d)" % size))
+        chunk = (gdb.parse_and_eval("(void *) malloc((size_t) %d)" % size))
        
        pointer = pointervalue(chunk)
        if pointer == 0:
@@ -1958,8 +2173,9 @@ class PythonCodeExecutor(object):
        stringp = self.alloc_string(string)
        try:
            result = gdb.parse_and_eval(
-                'PyString_FromStringAndSize((char *) %d, (size_t) %d)' % 
-                                                 (stringp, len(string)))
+                '(PyObject *) PyString_FromStringAndSize('
+                    '(char *) %d,'
+                    '(size_t) %d)' % (stringp, len(string)))
        finally:
            self.free(stringp)