Commit eaf88f07 authored by Kirill Smelkov's avatar Kirill Smelkov

tests: Show pickles in a way that can be copy-pasted into Python

When encoding tests fails, the "want" and "have" pickles are printed. It
is handy to copy-paste those pickles into Python console and check them
further there.

Pickle printing currently uses %q. However in Go fmt's %q can use \u and
\U if byte sequence form a valid UTF-8 character. That poses a problem:
in Python str (py2) or bytes (py3) literal \uXXXX are not processed as
unicode-escapes and enter the string as is. This result in different
pickle data pasted into Python and further confusion.

Entering data into Python as unicode literals (where \u works) and then
adding .encode('utf-8') also does not generally work - as pickle data is
generally arbitrary it can be a not valid UTF-8, for example:

	"\x80\u043c\u0438\u0440"	(= "\x80мир"   = "\x80\xd0\xbc\xd0\xb8\xd1\x80")

end unicode-encoding them in python also gives different data:

	In [1]: u"\x80\u043c\u0438\u0440".encode('utf-8')
	Out[1]: '\xc2\x80\xd0\xbc\xd0\xb8\xd1\x80'

(note leading extra \xc2)

For this reason let's implement quoting - that Python can understand -
ourselves. This dumping functionality was very handy during recent
encoder fixes debugging.
parent a82d5d30
...@@ -10,6 +10,7 @@ import ( ...@@ -10,6 +10,7 @@ import (
"reflect" "reflect"
"strconv" "strconv"
"testing" "testing"
"unicode/utf8"
) )
func bigInt(s string) *big.Int { func bigInt(s string) *big.Int {
...@@ -482,7 +483,7 @@ func testEncode(t *testing.T, proto int, object, objectDecodedBack interface{}, ...@@ -482,7 +483,7 @@ func testEncode(t *testing.T, proto int, object, objectDecodedBack interface{},
} }
data := buf.String() data := buf.String()
if dataOk != "" && data != dataOk { if dataOk != "" && data != dataOk {
t.Errorf("encode:\nhave: %q\nwant: %q", data, dataOk) t.Errorf("encode:\nhave: %s\nwant: %s", pyquote(data), pyquote(dataOk))
} }
// encode | limited writer -> write error // encode | limited writer -> write error
...@@ -868,6 +869,58 @@ func TestUnquoteCharEOF(t *testing.T) { ...@@ -868,6 +869,58 @@ func TestUnquoteCharEOF(t *testing.T) {
} }
} }
// pyquote, similarly to strconv.Quote, quotes s with " but does not use "\u" and "\U" inside.
//
// We need to avoid \u and friends, since for regular strings Python translates
// \u to \\u, not an UTF-8 character.
//
// Dumping strings in a way that is possible to copy/paste into Python and use
// pickletools.dis and pickle.loads there to verify a pickle is handy.
func pyquote(s string) string {
const hexdigits = "0123456789abcdef"
out := make([]byte, 0, len(s))
for {
r, width := utf8.DecodeRuneInString(s)
if width == 0 {
break
}
emitRaw := false
switch {
// invalid & everything else goes in numeric byte escapes
case r == utf8.RuneError:
fallthrough
default:
emitRaw = true
case r == '\\' || r == '"':
out = append(out, '\\', byte(r))
case strconv.IsPrint(r):
out = append(out, s[:width]...)
case r < ' ':
rq := strconv.QuoteRune(r) // e.g. "'\n'"
rq = rq[1:len(rq)-1] // -> `\n`
out = append(out, rq...)
}
if emitRaw {
for i := 0; i < width; i++ {
out = append(out, '\\', 'x', hexdigits[s[i]>>4], hexdigits[s[i]&0xf])
}
}
s = s[width:]
}
return "\"" + string(out) + "\""
}
// like io.LimitedReader but for writes // like io.LimitedReader but for writes
// XXX it would be good to have it in stdlib // XXX it would be good to have it in stdlib
type LimitedWriter struct { type LimitedWriter struct {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment