Commit 137764ea authored by gwenn's avatar gwenn

First draft of a CSV module for loading CSV file as virtual table.

parent e238c754
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package sqlite
import (
"errors"
"fmt"
"io"
"os"
"github.com/gwenn/yacr"
)
type csvModule struct { // ok
}
// args[0] => module name
// args[1] => db name
// args[2] => table name
func (m csvModule) Create(c *Conn, args []string) (VTab, error) {
/*
err := c.DeclareVTab("CREATE TABLE x(test TEXT)")
if err != nil {
return nil, err
}*/
if len(args) < 4 {
return nil, errors.New("No CSV file specified")
}
/* pull out name of csv file (remove quotes) */
filename := args[3]
if filename[0] == '\'' {
filename = filename[1 : len(filename)-1]
}
/* if a custom delimiter specified, pull it out */
var separator byte = ','
guess := true
if len(args) > 4 {
separator = args[4][0]
if separator == '\'' {
separator = args[4][1]
}
guess = false
}
/* should the header zRow be used */
useHeaderRow := false
if len(args) > 5 {
if args[5] == "USE_HEADER_ROW" {
useHeaderRow = true
}
}
/* open the source csv file */
file, err := os.Open(filename)
if err != nil {
return nil, fmt.Errorf("Error opening CSV file: '%s'", filename)
}
/* Read first zRow to obtain column names/number */
reader := yacr.NewReader(file, separator, true, guess)
vTab := &csvTab{f: file, r: reader, cols: make([]string, 0, 10)}
if err = vTab.readRow(); err != nil || len(vTab.cols) == 0 {
file.Close()
if err == nil {
err = errors.New("No columns found")
}
return nil, err
}
if useHeaderRow {
if vTab.offsetFirstRow, err = file.Seek(0, os.SEEK_CUR); err != nil {
file.Close()
return nil, err
}
}
/* Create the underlying relational database schema. If
* that is successful, call sqlite3_declare_vtab() to configure
* the csv table schema.
*/
sql := "CREATE TABLE x("
tail := ", "
for i, col := range vTab.cols {
if i == len(vTab.cols)-1 {
tail = ");"
}
if useHeaderRow {
if len(col) == 0 {
file.Close()
return nil, errors.New("No column name found")
}
sql = fmt.Sprintf("%s\"%s\"%s", sql, col, tail)
} else {
sql = fmt.Sprintf("%scol%d%s", sql, i+1, tail)
}
}
if err = c.DeclareVTab(sql); err != nil {
file.Close()
return nil, err
}
return vTab, nil
}
func (m csvModule) Connect(c *Conn, args []string) (VTab, error) { // ok
return m.Create(c, args)
}
func (m csvModule) Destroy() { // nothing to do
}
type csvTab struct {
f *os.File
r *yacr.Reader
eof bool
offsetFirstRow int64
cols []string
}
func (v *csvTab) readRow() error {
v.cols = v.cols[:0]
for {
if !v.r.Scan() {
err := v.r.Err()
v.eof = err == nil
return err
}
if v.r.EmptyLine() { // skip empty line (or line comment)
continue
}
v.cols = append(v.cols, v.r.Text())
if v.r.EndOfRecord() {
break
}
}
return nil
}
func (v *csvTab) release() error {
// TODO csvRelease has a counter reference?
if v != nil && v.f != nil {
return v.f.Close()
}
return nil
}
func (v *csvTab) BestIndex() error { // ok
return nil
}
func (v *csvTab) Disconnect() error { // ok
return v.release()
}
func (v *csvTab) Destroy() error { // ok
return v.release()
}
func (v *csvTab) Open() (VTabCursor, error) { // ok
return &csvTabCursor{v, 0}, nil
}
type csvTabCursor struct {
vTab *csvTab
csvpos int64 // ftell position of current zRow
}
func (vc *csvTabCursor) Close() error { // ok
return nil
}
func (vc *csvTabCursor) Filter() error { // ok
// csvFilter
/* seek back to start of first zRow */
vc.vTab.eof = false
if _, err := vc.vTab.f.Seek(vc.vTab.offsetFirstRow, os.SEEK_SET); err != nil {
return err
}
/* read and parse next line */
return vc.Next()
}
func (vc *csvTabCursor) Next() (err error) { // ok
if vc.vTab.eof {
return io.EOF
}
/* update the cursor */
if vc.csvpos, err = vc.vTab.f.Seek(0, os.SEEK_CUR); err != nil {
return err
}
/* read the next row of data */
return vc.vTab.readRow()
}
func (vc *csvTabCursor) Eof() bool { // ok
return vc.vTab.eof
}
func (vc *csvTabCursor) Column(c *Context, col int) error { // ok
cols := vc.vTab.cols
if col < 0 || col >= len(cols) {
return fmt.Errorf("column index out of bounds: %d", col)
}
if cols == nil {
c.ResultNull()
return nil
}
// TODO dynamic typing c.ResultInt64()
c.ResultText(cols[col])
return nil
}
func (vc *csvTabCursor) Rowid() (int64, error) { // ok
return vc.csvpos, nil
}
func LoadCsvModule(db *Conn) error { // ok
return db.CreateModule("csv", csvModule{})
}
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package sqlite_test
import (
"testing"
. "github.com/gwenn/gosqlite"
)
func TestCsvModule(t *testing.T) {
db := open(t)
defer checkClose(db, t)
err := LoadCsvModule(db)
checkNoError(t, err, "couldn't create CSV module: %s")
err = db.Exec("CREATE VIRTUAL TABLE vtab USING csv('test.csv')")
checkNoError(t, err, "couldn't create CSV virtual table: %s")
s, err := db.Prepare("SELECT rowid, * FROM vtab")
checkNoError(t, err, "couldn't select from CSV virtual table: %s")
defer checkFinalize(s, t)
var i int
var col1, col2, col3 string
err = s.Select(func(s *Stmt) (err error) {
if err = s.Scan(&i, &col1, &col2, &col3); err != nil {
return
}
//fmt.Printf("%d: %s|%s|%s\n", i, col1, col2, col3)
return
})
checkNoError(t, err, "couldn't select from CSV virtual table: %s")
err = db.Exec("DROP TABLE vtab")
checkNoError(t, err, "couldn't drop CSV virtual table: %s")
}
"colA","colB","colC"
1,2,3
a,b,c
a,"b",c
"a","b","c .. z"
"a","b","c,d"
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment