python.go 2.92 KB
Newer Older
Kirill Smelkov's avatar
.  
Kirill Smelkov committed
1
package xfmt
2 3

import (
4
	"bytes"
5 6 7 8 9 10
	"strconv"
	"unicode/utf8"

	"lab.nexedi.com/kirr/go123/mem"
)

Kirill Smelkov's avatar
.  
Kirill Smelkov committed
11

Kirill Smelkov's avatar
.  
Kirill Smelkov committed
12 13 14 15 16 17 18 19 20 21 22 23
// TODO remove - not needed ?
// // pyQuote quotes string the way python repr(str) would do
// func pyQuote(s string) string {
// 	out := pyQuoteBytes(mem.Bytes(s))
// 	return mem.String(out)
// }
//
// func pyQuoteBytes(b []byte) []byte {
// 	buf := make([]byte, 0, (len(b) + 2) /* to reduce allocations when quoting */ * 2)
// 	return pyAppendQuoteBytes(buf, b)
// }

24 25 26 27 28
// bytesContainsByte is like bytes.ContainsRune but a bit faster
func bytesContainsByte(s []byte, c byte) bool {
	return bytes.IndexByte(s, c) >= 0
}

Kirill Smelkov's avatar
.  
Kirill Smelkov committed
29 30 31
// AppendQuotePy appends to buf Python quoting of s
func AppendQuotePy(buf []byte, s string) []byte {
	return AppendQuotePyBytes(buf, mem.Bytes(s))
Kirill Smelkov's avatar
.  
Kirill Smelkov committed
32
}
33

Kirill Smelkov's avatar
.  
Kirill Smelkov committed
34 35
// AppendQuotePyBytes appends to buf Python quoting of b
func AppendQuotePyBytes(buf, b []byte) []byte {
36 37 38
	// smartquotes: choose ' or " as quoting character
	// https://github.com/python/cpython/blob/v2.7.13-116-g1aa1803b3d/Objects/stringobject.c#L947
	quote := byte('\'')
39
	if bytesContainsByte(b, '\'') && !bytesContainsByte(b, '"') {
Kirill Smelkov's avatar
Kirill Smelkov committed
40
		quote = '"'
41 42 43 44
	}

	buf = append(buf, quote)

45
	for len(b) > 0 {
Kirill Smelkov's avatar
Kirill Smelkov committed
46 47 48 49
		c := b[0]
		switch {
		// fast path - ASCII only - trying to avoid UTF-8 decoding
		case c < utf8.RuneSelf:
50 51
			switch {
				case c == '\\' || c == quote:
Kirill Smelkov's avatar
Kirill Smelkov committed
52 53
					buf = append(buf, '\\', c)

54 55 56 57 58 59 60
				case ' ' <= c && c <= '\x7e':
					// printable ASCII
					buf = append(buf, c)


				// below: non-printable ASCII

Kirill Smelkov's avatar
Kirill Smelkov committed
61 62
				// NOTE python converts to \<letter> only \t \n \r  (not e.g. \v)
				// https://github.com/python/cpython/blob/v2.7.13-116-g1aa1803b3d/Objects/stringobject.c#L963
63
				case c == '\t':
Kirill Smelkov's avatar
Kirill Smelkov committed
64
					buf = append(buf, `\t`...)
65
				case c == '\n':
Kirill Smelkov's avatar
Kirill Smelkov committed
66
					buf = append(buf, `\n`...)
67
				case c == '\r':
Kirill Smelkov's avatar
Kirill Smelkov committed
68 69 70
					buf = append(buf, `\r`...)

				default:
71 72 73 74
					// NOTE c < ' ' or c == '\x7f' (the only non-printable ASCII character > space) here
					// we already converted to \<letter> what python represents as such above
					// everything else goes in numeric byte escapes
					buf = append(buf, '\\', 'x', hexdigits[c>>4], hexdigits[c&0xf])
Kirill Smelkov's avatar
Kirill Smelkov committed
75
			}
76

Kirill Smelkov's avatar
Kirill Smelkov committed
77
			b = b[1:]
78

Kirill Smelkov's avatar
Kirill Smelkov committed
79 80 81
		// slow path - full UTF-8 decoding
		default:
			r, size := utf8.DecodeRune(b)
Kirill Smelkov's avatar
.  
Kirill Smelkov committed
82

83 84 85
			switch {
			case r == utf8.RuneError:
				// decode error - just emit raw byte as escaped
Kirill Smelkov's avatar
Kirill Smelkov committed
86
				buf = append(buf, '\\', 'x', hexdigits[c>>4], hexdigits[c&0xf])
87

88 89 90
			case strconv.IsPrint(r):
				// printable utf-8 characters go as is
				buf = append(buf, b[:size]...)
Kirill Smelkov's avatar
Kirill Smelkov committed
91

92 93 94 95
			default:
				// everything else goes in numeric byte escapes
				for i := 0; i < size; i++ {
					buf = append(buf, '\\', 'x', hexdigits[b[i]>>4], hexdigits[b[i]&0xf])
Kirill Smelkov's avatar
.  
Kirill Smelkov committed
96
				}
97
			}
98

Kirill Smelkov's avatar
Kirill Smelkov committed
99 100
			b = b[size:]
		}
101 102 103 104 105
	}

	buf = append(buf, quote)
	return buf
}
Kirill Smelkov's avatar
.  
Kirill Smelkov committed
106 107 108 109


// Qpy appends string quoted as Python would do
func (b *Buffer) Qpy(s string) *Buffer {
Kirill Smelkov's avatar
.  
Kirill Smelkov committed
110
	*b = AppendQuotePy(*b, s)
Kirill Smelkov's avatar
.  
Kirill Smelkov committed
111 112 113
	return b
}

Kirill Smelkov's avatar
.  
Kirill Smelkov committed
114 115 116
// Qbpy appends []byte quoted as Python would do
func (b *Buffer) Qbpy(x []byte) *Buffer {
	*b = AppendQuotePyBytes(*b, x)
Kirill Smelkov's avatar
.  
Kirill Smelkov committed
117 118
	return b
}