Commit f26bd77e authored by gwenn's avatar gwenn

Fix CSV virtual table implementation.

parent 325b608f
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
package sqlite package sqlite
import ( import (
"bufio"
"errors" "errors"
"fmt" "fmt"
"io" "io"
...@@ -16,7 +17,7 @@ import ( ...@@ -16,7 +17,7 @@ import (
"github.com/gwenn/yacr" "github.com/gwenn/yacr"
) )
type csvModule struct { // ok type csvModule struct {
} }
// args[0] => module name // args[0] => module name
...@@ -24,11 +25,6 @@ type csvModule struct { // ok ...@@ -24,11 +25,6 @@ type csvModule struct { // ok
// args[2] => table name // args[2] => table name
func (m csvModule) Create(c *Conn, args []string) (VTab, error) { func (m csvModule) Create(c *Conn, args []string) (VTab, error) {
/*
err := c.DeclareVTab("CREATE TABLE x(test TEXT)")
if err != nil {
return nil, err
}*/
if len(args) < 4 { if len(args) < 4 {
return nil, errors.New("No CSV file specified") return nil, errors.New("No CSV file specified")
} }
...@@ -63,24 +59,24 @@ func (m csvModule) Create(c *Conn, args []string) (VTab, error) { ...@@ -63,24 +59,24 @@ func (m csvModule) Create(c *Conn, args []string) (VTab, error) {
if err != nil { if err != nil {
return nil, fmt.Errorf("Error opening CSV file: '%s'", filename) return nil, fmt.Errorf("Error opening CSV file: '%s'", filename)
} }
defer file.Close()
/* Read first zRow to obtain column names/number */ /* Read first zRow to obtain column names/number */
reader := yacr.NewReader(file, separator, quoted, guess) vTab := &csvTab{f: filename, sep: separator, quoted: quoted, cols: make([]string, 0, 10)}
vTab := &csvTab{f: file, r: reader, cols: make([]string, 0, 10)}
vTab.maxLength = int(c.Limit(LimitLength)) vTab.maxLength = int(c.Limit(LimitLength))
vTab.maxColumn = int(c.Limit(LimitColumn)) vTab.maxColumn = int(c.Limit(LimitColumn))
if err = vTab.readRow(); err != nil || len(vTab.cols) == 0 { reader := yacr.NewReader(file, separator, quoted, guess)
file.Close() if useHeaderRow {
reader.Split(vTab.split(reader.ScanField))
}
if err = vTab.readRow(reader); err != nil || len(vTab.cols) == 0 {
if err == nil { if err == nil {
err = errors.New("No columns found") err = errors.New("No columns found")
} }
return nil, err return nil, err
} }
if useHeaderRow { if guess {
if vTab.offsetFirstRow, err = file.Seek(0, os.SEEK_CUR); err != nil { vTab.sep = reader.Sep()
file.Close()
return nil, err
}
} }
/* Create the underlying relational database schema. If /* Create the underlying relational database schema. If
* that is successful, call sqlite3_declare_vtab() to configure * that is successful, call sqlite3_declare_vtab() to configure
...@@ -94,7 +90,6 @@ func (m csvModule) Create(c *Conn, args []string) (VTab, error) { ...@@ -94,7 +90,6 @@ func (m csvModule) Create(c *Conn, args []string) (VTab, error) {
} }
if useHeaderRow { if useHeaderRow {
if len(col) == 0 { if len(col) == 0 {
file.Close()
return nil, errors.New("No column name found") return nil, errors.New("No column name found")
} }
sql = fmt.Sprintf("%s\"%s\"%s", sql, col, tail) sql = fmt.Sprintf("%s\"%s\"%s", sql, col, tail)
...@@ -103,12 +98,11 @@ func (m csvModule) Create(c *Conn, args []string) (VTab, error) { ...@@ -103,12 +98,11 @@ func (m csvModule) Create(c *Conn, args []string) (VTab, error) {
} }
} }
if err = c.DeclareVTab(sql); err != nil { if err = c.DeclareVTab(sql); err != nil {
file.Close()
return nil, err return nil, err
} }
return vTab, nil return vTab, nil
} }
func (m csvModule) Connect(c *Conn, args []string) (VTab, error) { // ok func (m csvModule) Connect(c *Conn, args []string) (VTab, error) {
return m.Create(c, args) return m.Create(c, args)
} }
...@@ -116,8 +110,9 @@ func (m csvModule) Destroy() { // nothing to do ...@@ -116,8 +110,9 @@ func (m csvModule) Destroy() { // nothing to do
} }
type csvTab struct { type csvTab struct {
f *os.File f string
r *yacr.Reader sep byte
quoted bool
eof bool eof bool
offsetFirstRow int64 offsetFirstRow int64
cols []string cols []string
...@@ -126,18 +121,26 @@ type csvTab struct { ...@@ -126,18 +121,26 @@ type csvTab struct {
maxColumn int maxColumn int
} }
func (v *csvTab) readRow() error { func (v *csvTab) split(original bufio.SplitFunc) bufio.SplitFunc {
return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
advance, token, err = original(data, atEOF)
v.offsetFirstRow += int64(advance)
return
}
}
func (v *csvTab) readRow(r *yacr.Reader) error {
v.cols = v.cols[:0] v.cols = v.cols[:0]
for { for {
if !v.r.Scan() { if !r.Scan() {
err := v.r.Err() err := r.Err()
v.eof = err == nil v.eof = err == nil
return err return err
} }
if v.r.EmptyLine() { // skip empty line (or line comment) if r.EmptyLine() { // skip empty line (or line comment)
continue continue
} }
col := v.r.Text() col := r.Text()
if len(col) >= v.maxLength { if len(col) >= v.maxLength {
return fmt.Errorf("CSV row is too long (>= %d)", v.maxLength) return fmt.Errorf("CSV row is too long (>= %d)", v.maxLength)
} }
...@@ -145,7 +148,7 @@ func (v *csvTab) readRow() error { ...@@ -145,7 +148,7 @@ func (v *csvTab) readRow() error {
if len(v.cols) >= v.maxColumn { if len(v.cols) >= v.maxColumn {
return fmt.Errorf("Too many columns (>= %d)", v.maxColumn) return fmt.Errorf("Too many columns (>= %d)", v.maxColumn)
} }
if v.r.EndOfRecord() { if r.EndOfRecord() {
break break
} }
} }
...@@ -153,59 +156,68 @@ func (v *csvTab) readRow() error { ...@@ -153,59 +156,68 @@ func (v *csvTab) readRow() error {
} }
func (v *csvTab) release() error { func (v *csvTab) release() error {
// TODO csvRelease has a counter reference?
if v != nil && v.f != nil {
return v.f.Close()
}
return nil return nil
} }
func (v *csvTab) BestIndex() error { // ok func (v *csvTab) BestIndex() error {
return nil return nil
} }
func (v *csvTab) Disconnect() error { // ok func (v *csvTab) Disconnect() error {
return v.release() return v.release()
} }
func (v *csvTab) Destroy() error { // ok func (v *csvTab) Destroy() error {
return v.release() return v.release()
} }
func (v *csvTab) Open() (VTabCursor, error) { // ok func (v *csvTab) Open() (VTabCursor, error) {
return &csvTabCursor{v, 0}, nil f, err := os.Open(v.f)
if err != nil {
return nil, err
}
return &csvTabCursor{vTab: v, f: f, rowNumber: 0}, nil
} }
type csvTabCursor struct { type csvTabCursor struct {
vTab *csvTab vTab *csvTab
csvpos int64 // ftell position of current zRow f *os.File
r *yacr.Reader
rowNumber int64
} }
func (vc *csvTabCursor) Close() error { // ok func (vc *csvTabCursor) Close() error {
return nil return vc.f.Close()
} }
func (vc *csvTabCursor) Filter() error { // ok func (vc *csvTabCursor) Filter() error {
// csvFilter v := vc.vTab
/* seek back to start of first zRow */ /* seek back to start of first zRow */
vc.vTab.eof = false v.eof = false
if _, err := vc.vTab.f.Seek(vc.vTab.offsetFirstRow, os.SEEK_SET); err != nil { if _, err := vc.f.Seek(v.offsetFirstRow, os.SEEK_SET); err != nil {
return err return err
} }
vc.rowNumber = 0
/* a new reader/scanner must be created because there is no way to reset its internal buffer/state (which has been invalidated by the SEEK_SET)*/
vc.r = yacr.NewReader(vc.f, v.sep, v.quoted, false)
/* read and parse next line */ /* read and parse next line */
return vc.Next() return vc.Next()
} }
func (vc *csvTabCursor) Next() (err error) { // ok func (vc *csvTabCursor) Next() error {
if vc.vTab.eof { v := vc.vTab
if v.eof {
return io.EOF return io.EOF
} }
/* update the cursor */ if vc.r == nil {
if vc.csvpos, err = vc.vTab.f.Seek(0, os.SEEK_CUR); err != nil { vc.r = yacr.NewReader(vc.f, v.sep, v.quoted, false)
return err
} }
/* read the next row of data */ /* read the next row of data */
return vc.vTab.readRow() err := v.readRow(vc.r)
if err == nil {
vc.rowNumber++
}
return err
} }
func (vc *csvTabCursor) Eof() bool { // ok func (vc *csvTabCursor) Eof() bool {
return vc.vTab.eof return vc.vTab.eof
} }
func (vc *csvTabCursor) Column(c *Context, col int) error { // ok func (vc *csvTabCursor) Column(c *Context, col int) error {
cols := vc.vTab.cols cols := vc.vTab.cols
if col < 0 || col >= len(cols) { if col < 0 || col >= len(cols) {
return fmt.Errorf("column index out of bounds: %d", col) return fmt.Errorf("column index out of bounds: %d", col)
...@@ -218,10 +230,10 @@ func (vc *csvTabCursor) Column(c *Context, col int) error { // ok ...@@ -218,10 +230,10 @@ func (vc *csvTabCursor) Column(c *Context, col int) error { // ok
c.ResultText(cols[col]) c.ResultText(cols[col])
return nil return nil
} }
func (vc *csvTabCursor) Rowid() (int64, error) { // ok func (vc *csvTabCursor) Rowid() (int64, error) {
return vc.csvpos, nil return vc.rowNumber, nil
} }
func LoadCsvModule(db *Conn) error { // ok func LoadCsvModule(db *Conn) error {
return db.CreateModule("csv", csvModule{}) return db.CreateModule("csv", csvModule{})
} }
...@@ -19,7 +19,7 @@ func TestCsvModule(t *testing.T) { ...@@ -19,7 +19,7 @@ func TestCsvModule(t *testing.T) {
err = db.Exec("CREATE VIRTUAL TABLE vtab USING csv('test.csv', USE_HEADER_ROW)") err = db.Exec("CREATE VIRTUAL TABLE vtab USING csv('test.csv', USE_HEADER_ROW)")
checkNoError(t, err, "couldn't create CSV virtual table: %s") checkNoError(t, err, "couldn't create CSV virtual table: %s")
s, err := db.Prepare("SELECT rowid, * FROM vtab") s, err := db.Prepare("SELECT rowid, * FROM vtab ORDER BY rowid LIMIT 3 OFFSET 2")
checkNoError(t, err, "couldn't select from CSV virtual table: %s") checkNoError(t, err, "couldn't select from CSV virtual table: %s")
defer checkFinalize(s, t) defer checkFinalize(s, t)
......
...@@ -16,7 +16,7 @@ func TestLimit(t *testing.T) { ...@@ -16,7 +16,7 @@ func TestLimit(t *testing.T) {
defer checkClose(db, t) defer checkClose(db, t)
limitVariableNumber := db.Limit(LimitVariableNumber) limitVariableNumber := db.Limit(LimitVariableNumber)
assert.T(t, limitVariableNumber < 1000, "unexpected value for LimitVariableNumber") assert.Tf(t, limitVariableNumber < 1e6, "unexpected value for LimitVariableNumber: %d", limitVariableNumber)
oldLimitVariableNumber := db.SetLimit(LimitVariableNumber, 99) oldLimitVariableNumber := db.SetLimit(LimitVariableNumber, 99)
assert.Equalf(t, limitVariableNumber, oldLimitVariableNumber, "unexpected value for LimitVariableNumber: %d <> %d", limitVariableNumber, oldLimitVariableNumber) assert.Equalf(t, limitVariableNumber, oldLimitVariableNumber, "unexpected value for LimitVariableNumber: %d <> %d", limitVariableNumber, oldLimitVariableNumber)
limitVariableNumber = db.Limit(LimitVariableNumber) limitVariableNumber = db.Limit(LimitVariableNumber)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment