Commit ae9e330c authored by Kirill Smelkov's avatar Kirill Smelkov

X fstail move pyquote out + test

parent 9afdf134
......@@ -28,72 +28,10 @@ import (
"io"
"log"
"os"
"strconv"
"strings"
"unicode/utf8"
"../../../../storage/fs1"
"lab.nexedi.com/kirr/go123/mem"
)
// pyQuote quotes string the way python repr(str) would do
func pyQuote(s string) string {
out := pyQuoteBytes(mem.Bytes(s))
return mem.String(out)
}
func pyQuoteBytes(b []byte) []byte {
s := mem.String(b)
buf := make([]byte, 0, len(s))
// smartquotes: choose ' or " as quoting character
// https://github.com/python/cpython/blob/v2.7.13-116-g1aa1803b3d/Objects/stringobject.c#L947
quote := byte('\'')
noquote := byte('"')
if strings.ContainsRune(s, '\'') && !strings.ContainsRune(s, '"') {
quote, noquote = noquote, quote
}
buf = append(buf, quote)
for i, r := range s {
switch r {
case utf8.RuneError:
buf = append(buf, []byte(fmt.Sprintf("\\x%02x", s[i]))...)
case '\\', rune(quote):
buf = append(buf, '\\', byte(r))
case rune(noquote):
buf = append(buf, noquote)
// NOTE python converts to \<letter> only \t \n \r (not e.g. \v)
// https://github.com/python/cpython/blob/v2.7.13-116-g1aa1803b3d/Objects/stringobject.c#L963
case '\t':
buf = append(buf, `\t`...)
case '\n':
buf = append(buf, `\n`...)
case '\r':
buf = append(buf, `\r`...)
default:
switch {
case r < ' ':
// we already converted to \<letter> what python represents as such above
buf = append(buf, []byte(fmt.Sprintf("\\x%02x", s[i]))...)
default:
// we already handled ', " and (< ' ') above, so now it
// should be safe to reuse strconv.QuoteRune
rq := strconv.QuoteRune(r) // "'\x01'"
rq = rq[1:len(rq)-1] // "\x01"
buf = append(buf, rq...)
}
}
}
buf = append(buf, quote)
return buf
}
func fsTail(w io.Writer, path string, ntxn int) (err error) {
// path & fstail on error context
......
// XXX move me out of here
package main
import (
"fmt"
"strconv"
"strings"
"unicode/utf8"
"lab.nexedi.com/kirr/go123/mem"
)
// pyQuote quotes string the way python repr(str) would do
func pyQuote(s string) string {
out := pyQuoteBytes(mem.Bytes(s))
return mem.String(out)
}
func pyQuoteBytes(b []byte) []byte {
s := mem.String(b)
buf := make([]byte, 0, len(s))
// smartquotes: choose ' or " as quoting character
// https://github.com/python/cpython/blob/v2.7.13-116-g1aa1803b3d/Objects/stringobject.c#L947
quote := byte('\'')
noquote := byte('"')
if strings.ContainsRune(s, '\'') && !strings.ContainsRune(s, '"') {
quote, noquote = noquote, quote
}
buf = append(buf, quote)
for i, r := range s {
switch r {
case utf8.RuneError:
buf = append(buf, []byte(fmt.Sprintf("\\x%02x", s[i]))...)
case '\\', rune(quote):
buf = append(buf, '\\', byte(r))
case rune(noquote):
buf = append(buf, noquote)
// NOTE python converts to \<letter> only \t \n \r (not e.g. \v)
// https://github.com/python/cpython/blob/v2.7.13-116-g1aa1803b3d/Objects/stringobject.c#L963
case '\t':
buf = append(buf, `\t`...)
case '\n':
buf = append(buf, `\n`...)
case '\r':
buf = append(buf, `\r`...)
default:
switch {
case r < ' ':
// we already converted to \<letter> what python represents as such above
buf = append(buf, []byte(fmt.Sprintf("\\x%02x", s[i]))...)
default:
// we already handled ', " and (< ' ') above, so now it
// should be safe to reuse strconv.QuoteRune
rq := strconv.QuoteRune(r) // "'\x01'"
rq = rq[1:len(rq)-1] // "\x01"
buf = append(buf, rq...)
}
}
}
buf = append(buf, quote)
return buf
}
// XXX move me to common place
package main
import (
"testing"
)
// byterange returns []byte with element [start,stop)
func byterange(start, stop byte) []byte {
b := make([]byte, 0, stop-start)
for ; start < stop; start++ {
b = append(b, start)
}
return b
}
func TestPyQuote(t *testing.T) {
// XXX -> global
testv := []struct {in, quoted string} {
// empty
{``, `''`},
// special characters
{string(byterange(0, 32)), `'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'`},
// " vs '
{`hello world`, `'hello world'`},
{`hello ' world`, `"hello ' world"`},
{`hello ' " world`, `'hello \' " world'`},
// \
{`hello \ world`, `'hello \\ world'`},
// utf-8
// XXX python escapes non-ascii, but since FileStorage connot
// commit such strings we take the freedom and output them as
// readable
//{`привет мир`, `'\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82 \xd0\xbc\xd0\xb8\xd1\x80'`},
{`привет мир`, `'привет мир'`},
// invalid utf-8
{"\xd0a", `'\xd0a'`},
}
for _, tt := range testv {
quoted := pyQuote(tt.in)
if quoted != tt.quoted {
t.Errorf("pyQuote(%q) -> %s ; want %s", tt.in, quoted, tt.quoted)
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment