Commit 961e365a authored by gwenn's avatar gwenn

Improve CSV virtual table module.

One may specify column names/types as module arguments.
parent d6f93349
......@@ -6,7 +6,7 @@ There are two layers:
* one matching the SQLite API (with Backup, Blob, user-defined Function/Module, ...).
* and another implementing the "database/sql/driver" interface.
[![GoDoc](https://godoc.org/github.com/gwenn/gosqlite?status.png)](https://godoc.org/github.com/gwenn/gosqlite)
[![GoDoc](https://godoc.org/github.com/gwenn/gosqlite?status.svg)](https://godoc.org/github.com/gwenn/gosqlite)
[![Build Status][1]][2]
......
......@@ -18,13 +18,20 @@ import (
type csvModule struct {
}
// args[0] => module name
// args[1] => db name
// args[2] => table name
// TODO http://www.ch-werner.de/sqliteodbc/html/csvtable_8c.html make possible to specify the column/type name
// TODO https://github.com/karbarcca/SQLite.jl & infer
// args[0] => module name
// args[1] => db name
// args[2] => table name
// args[3] => filename (maybe quoted: '...')
// args[i>3] :
// - contains HEADER ignoring case => use first line in file as column names or skip first line if NAMES are specified
// - contains NO_QUOTE ignoring case => no double quoted field expected in file
// - single char (;) or quoted char (';') => values separator in file
// - contains NAMES ignoring case => use args[i+1], ... as column names (until _TYPES_)
// - contains TYPES ignoring case => use args[I+1], ... as column types
// Beware, empty args are skipped (..., ,...), use '' empty SQL string instead (..., '', ...).
func (m csvModule) Create(c *Conn, args []string) (VTab, error) {
if len(args) < 4 {
return nil, errors.New("no CSV file specified")
......@@ -37,22 +44,38 @@ func (m csvModule) Create(c *Conn, args []string) (VTab, error) {
/* if a custom delimiter specified, pull it out */
var separator byte = ','
/* should the header zRow be used */
useHeaderRow := false
header := false
quoted := true
guess := true
var cols, types []string
for i := 4; i < len(args); i++ {
arg := args[i]
switch {
case strings.Contains(strings.ToUpper(arg), "HEADER"):
useHeaderRow = true
case strings.Contains(strings.ToUpper(arg), "NO_QUOTE"):
quoted = false
case types != nil:
if arg[0] == '\'' {
arg = arg[1 : len(arg)-1]
}
types = append(types, arg)
case cols != nil:
if strings.ToUpper(arg) == "_TYPES_" {
types = make([]string, 0, len(cols))
} else {
cols = append(cols, arg)
}
case len(arg) == 1:
separator = arg[0]
guess = false
case len(arg) == 3 && arg[0] == '\'':
separator = arg[1]
guess = false
case strings.Contains(strings.ToUpper(arg), "HEADER"):
header = true
case strings.Contains(strings.ToUpper(arg), "NO_QUOTE"):
quoted = false
case strings.Contains(strings.ToUpper(arg), "NAMES"):
cols = make([]string, 0, 10)
case strings.Contains(strings.ToUpper(arg), "TYPES"):
types = make([]string, 0, 10)
}
}
/* open the source csv file */
......@@ -67,15 +90,25 @@ func (m csvModule) Create(c *Conn, args []string) (VTab, error) {
vTab.maxColumn = int(c.Limit(LimitColumn))
reader := yacr.NewReader(file, separator, quoted, guess)
if useHeaderRow {
if header {
reader.Split(vTab.split(reader.ScanField))
}
if err = vTab.readRow(reader); err != nil || len(vTab.cols) == 0 {
if err == nil {
err = errors.New("no columns found")
}
if err = vTab.readRow(reader); err != nil {
return nil, err
}
named := header
if len(cols) > 0 { // headers ignored
// TODO check len(cols) == len(vTab.cols) ?
vTab.cols = cols
named = true
}
if len(vTab.cols) == 0 {
if len(types) == 0 {
return nil, errors.New("no column name/type specified")
}
vTab.cols = types
}
if guess {
vTab.sep = reader.Sep()
}
......@@ -89,13 +122,17 @@ func (m csvModule) Create(c *Conn, args []string) (VTab, error) {
if i == len(vTab.cols)-1 {
tail = ");"
}
if useHeaderRow {
colType := ""
if len(types) > i {
colType = " " + types[i]
}
if named {
if len(col) == 0 {
return nil, errors.New("no column name found")
}
sql = fmt.Sprintf("%s\"%s\"%s", sql, col, tail)
sql = fmt.Sprintf("%s\"%s\"%s%s", sql, col, colType, tail)
} else {
sql = fmt.Sprintf("%scol%d%s", sql, i+1, tail)
sql = fmt.Sprintf("%scol%d%s%s", sql, i+1, colType, tail)
}
}
if err = c.DeclareVTab(sql); err != nil {
......@@ -363,6 +400,7 @@ func (db *Conn) ImportCSV(in io.Reader, ic ImportConfig, dbName, table string) e
return err
}
}
var sql string
if len(dbName) == 0 {
sql = fmt.Sprintf(`INSERT INTO "%s" VALUES (?%s)`, escapeQuote(table), strings.Repeat(", ?", nCol-1))
......
......@@ -45,6 +45,120 @@ func TestCsvModule(t *testing.T) {
checkNoError(t, err, "couldn't drop CSV virtual table: %s")
}
var csvModuleTests = []struct {
Name string
Args []string
Count int
Names []string
Types []string
Error string
}{
{
Name: "No file",
Args: []string{},
Error: "no CSV file specified",
},
{
Name: "File not found",
Args: []string{"blam.csv"},
Error: "error opening CSV file: 'blam.csv'",
},
{
Name: "No header",
Args: []string{"test.csv"},
Count: 6,
Names: []string{"col1", "col2", "col3"},
Types: []string{"", "", ""},
},
{
Name: "Headers",
Args: []string{"test.csv", "USE_HEADER_ROW"},
Count: 5,
Names: []string{"colA", "colB", "colC"},
Types: []string{"", "", ""},
},
{
Name: "Names",
Args: []string{"test.csv", "COL_NAMES", "C1", "C2", "C3"},
Count: 6,
Names: []string{"C1", "C2", "C3"},
Types: []string{"", "", ""},
},
{
Name: "Names & Headers",
Args: []string{"test.csv", "HEADERS", "COL_NAMES", "C1", "C2", "C3"},
Count: 5,
Names: []string{"C1", "C2", "C3"},
Types: []string{"", "", ""},
},
{
Name: "Types",
Args: []string{"test.csv", "TYPES", "TEXT", "''", "TEXT"},
Names: []string{"col1", "col2", "col3"},
Types: []string{"TEXT", "", "TEXT"},
},
}
func TestCsvModuleArguments(t *testing.T) {
db := open(t)
defer checkClose(db, t)
err := LoadCsvModule(db)
checkNoError(t, err, "couldn't create CSV module: %s")
for _, tt := range csvModuleTests {
ddl := []byte("CREATE VIRTUAL TABLE vtab USING csv(")
for i, arg := range tt.Args {
if i > 0 {
ddl = append(ddl, ", "...)
}
ddl = append(ddl, arg...)
}
ddl = append(ddl, ")"...)
//println("DDL: ", string(ddl))
err = db.Exec(string(ddl))
if tt.Error != "" {
if err == nil || !strings.Contains(err.Error(), tt.Error) {
t.Errorf("%s: error %v, want error %q", tt.Name, err, tt.Error)
}
continue
} else {
checkNoError(t, err, "couldn't create CSV virtual table: %s")
}
if tt.Count > 0 {
var count int
err = db.OneValue("SELECT count(1) FROM vtab", &count)
checkNoError(t, err, "couldn't select from CSV virtual table: %s")
assert.Equalf(t, tt.Count, count, "%s: got %d rows, want %d", tt.Name, count, tt.Count)
}
/*var schema string
err = db.OneValue("SELECT sql FROM sqlite_master WHERE name like ? and type = ?", &schema, "vtab", "table")
checkNoError(t, err, "couldn't get schema of CSV virtual table: %s")
println("SCHEMA:", schema)*/
if len(tt.Names) > 0 {
cols, err := db.Columns("", "vtab")
checkNoError(t, err, "couldn't get columns of CSV virtual table: %s")
assert.Equalf(t, len(tt.Names), len(cols), "%s: got %d columns, want %d", tt.Name, len(cols), len(tt.Names))
for i, col := range cols {
assert.Equalf(t, tt.Names[i], col.Name, "%s: got %s, want %s as column name at %d", tt.Name, col.Name, tt.Names[i], i+1)
}
}
if len(tt.Types) > 0 {
cols, err := db.Columns("", "vtab")
checkNoError(t, err, "couldn't get columns of CSV virtual table: %s")
assert.Equalf(t, len(tt.Types), len(cols), "%s: got %d columns, want %d", tt.Name, len(cols), len(tt.Types))
for i, col := range cols {
assert.Equalf(t, tt.Types[i], col.DataType, "%s: got %s, want %s as column type at %d", tt.Name, col.DataType, tt.Types[i], i+1)
}
}
err = db.Exec("DROP TABLE vtab")
checkNoError(t, err, "couldn't drop CSV virtual table: %s")
}
}
func TestImportCSV(t *testing.T) {
db := open(t)
defer checkClose(db, t)
......
......@@ -94,7 +94,7 @@ type intArray struct {
// CreateIntArray create a specific instance of an intarray object.
//
// Each intarray object corresponds to a virtual table in the TEMP table
// Each intarray object corresponds to a virtual table in the TEMP database
// with the specified name.
//
// Destroy the intarray object by dropping the virtual table. If not done
......
......@@ -202,25 +202,29 @@ func (s *Stmt) ColumnTypeAffinity(index int) Affinity {
}
}
declType := s.ColumnDeclaredType(index)
affinity := typeAffinity(declType)
s.affinities[index] = affinity
return affinity
}
func (c Column) Affinity() Affinity {
return typeAffinity(c.DataType)
}
func typeAffinity(declType string) Affinity {
if declType == "" {
s.affinities[index] = None
return None
}
declType = strings.ToUpper(declType)
if strings.Contains(declType, "INT") {
s.affinities[index] = Integral
return Integral
} else if strings.Contains(declType, "TEXT") || strings.Contains(declType, "CHAR") || strings.Contains(declType, "CLOB") {
s.affinities[index] = Textual
return Textual
} else if strings.Contains(declType, "BLOB") {
s.affinities[index] = None
return None
} else if strings.Contains(declType, "REAL") || strings.Contains(declType, "FLOA") || strings.Contains(declType, "DOUB") {
s.affinities[index] = Real
return Real
}
s.affinities[index] = Numerical
return Numerical
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment