Commit db0cbaf8 authored by Kirill Smelkov's avatar Kirill Smelkov Committed by Kamil Kisiel

decoder: Fix integer overflow in BINUNICODE handling

BINUNICODE opcode comes with 4-byte little-endian unsigned int length after it.
However we were decoding it as signed int32 instead of unsigned uint32.

As the result if last byte of this length is >= 0x80 the length
was decoded as negative:

        y := int32(0x81)
        println(y << (3*8))

	Output: -2130706432

which was leading to empty unicode string returned instead of an error.

-> Fix it by decoding BINUNICODE lenght as uint32.

Without the fix added test fails as

	--- FAIL: TestDecodeError (0.00s)
	    ogorek_test.go:812: "X\xff\xff\xff\xff.": no decode error  ; got "", <nil>

And for that "X\xff\xff\xff\xff." Python also rejects to load this
pickle:

	In [1]: p = b"X\xff\xff\xff\xff."

	In [2]: pickle.loads(p)
	---------------------------------------------------------------------------
	UnpicklingError                           Traceback (most recent call last)
	Cell In [2], line 1
	----> 1 pickle.loads(p)

	UnpicklingError: pickle data was truncated

	In [3]: dis(p)
	---------------------------------------------------------------------------
	ValueError                                Traceback (most recent call last)
	Cell In [3], line 1
	----> 1 dis(p)

	File /usr/lib/python3.11/pickletools.py:2448, in dis(pickle, out, memo, indentlevel, annotate)
	   2446 errormsg = None
	   2447 annocol = annotate  # column hint for annotations
	-> 2448 for opcode, arg, pos in genops(pickle):
	   2449     if pos is not None:
	   2450         print("%5d:" % pos, end=' ', file=out)

	File /usr/lib/python3.11/pickletools.py:2291, in _genops(data, yield_end_pos)
	   2289     arg = None
	   2290 else:
	-> 2291     arg = opcode.arg.reader(data)
	   2292 if yield_end_pos:
	   2293     yield opcode, arg, pos, getpos()

	File /usr/lib/python3.11/pickletools.py:693, in read_unicodestring4(f)
	    691 if len(data) == n:
	    692     return str(data, 'utf-8', 'surrogatepass')
	--> 693 raise ValueError("expected %d bytes in a unicodestring4, but only %d "
	    694                  "remain" % (n, len(data)))

	ValueError: expected 4294967295 bytes in a unicodestring4, but only 1 remain
parent 465792bb
...@@ -903,16 +903,15 @@ func (d *Decoder) loadUnicode() error { ...@@ -903,16 +903,15 @@ func (d *Decoder) loadUnicode() error {
} }
func (d *Decoder) loadBinUnicode() error { func (d *Decoder) loadBinUnicode() error {
var length int32 var b [4]byte
for i := 0; i < 4; i++ { _, err := io.ReadFull(d.r, b[:])
t, err := d.r.ReadByte()
if err != nil { if err != nil {
return err return err
} }
length = length | (int32(t) << uint(8*i)) length := binary.LittleEndian.Uint32(b[:])
}
rawB := []byte{} rawB := []byte{}
for z := 0; int32(z) < length; z++ { for z := length; z > 0; z-- {
n, err := d.r.ReadByte() n, err := d.r.ReadByte()
if err != nil { if err != nil {
return err return err
......
...@@ -776,9 +776,11 @@ func TestDecodeError(t *testing.T) { ...@@ -776,9 +776,11 @@ func TestDecodeError(t *testing.T) {
// invalid protocol version // invalid protocol version
"\x80\xffI1\n.", "\x80\xffI1\n.",
// BINSTRING with big len and no data // BINSTRING and BINUNICODE with big len and no data
// (might cause out-of-memory DOS if buffer is preallocated blindly) // (might cause out-of-memory DOS if buffer is preallocated blindly)
"T\xff\xff\xff\xff.", "T\xff\xff\xff\xff.",
"X\xff\xff\xff\xff.",
// it is invalid to expose mark object // it is invalid to expose mark object
"(.", // MARK "(.", // MARK
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment